From 2723b42b3ab06bcce01af2fd74c91a85cef69752 Mon Sep 17 00:00:00 2001 From: Quarto GHA Workflow Runner Date: Mon, 10 Jun 2024 11:38:46 +0000 Subject: [PATCH] Built site for gh-pages --- .nojekyll | 2 +- index.html | 252 +++--- listings.json | 1 + posts/2013-04-01-download_weather.html | 10 +- posts/2013-05-01-aggregation-timeseries.html | 26 +- posts/2013-06-01-hmm_simulate.html | 2 +- posts/2013-07-01-hmm_continuous.html | 2 +- posts/2013-09-01-denoising.html | 2 +- posts/2014-05-01-dtw.html | 2 +- posts/2014-05-01-gibbs-sampling.html | 2 +- posts/2014-06-01-em.html | 2 +- posts/2014-06-02-latexify.html | 2 +- posts/2014-07-01-mcmc_coins.html | 2 +- posts/2017-04-19-nmf-out-matrix.html | 22 +- posts/2017-04-20-parafac-out-tensor.html | 2 +- posts/2017-04-21-constrained-nmf-cvx.html | 18 +- posts/2017-06-14-widgets-matplotlib.html | 6 +- posts/2017-06-15-linear-regression-prior.html | 2 +- posts/2017-08-02-fifty-ggplot-python-1.html | 6 +- ...08-12-linear-regression-adagrad-vs-gd.html | 2 +- posts/2017-08-13-mf-autograd-adagrad.html | 6 +- posts/2017-12-18-recommend-keras.html | 30 +- ...-12-29-neural-collaborative-filtering.html | 14 +- posts/2018-01-07-cs-phd-lessons.html | 2 +- posts/2018-01-13-denoising.html | 2 +- posts/2018-06-16-active-committee.html | 2 +- posts/2018-06-21-aq-india-map.html | 6 +- posts/2018-06-26-map-electricity-access.html | 18 +- ...-placement-preparation-2018-1-hashmap.html | 2 +- posts/2019-08-20-gaussian-processes.html | 2 +- posts/2020-01-14-test-markdown-post.html | 4 +- ...2020-02-20-bayesian-linear-regression.html | 2 +- posts/2020-02-28-xor-relu-vector.html | 2 +- posts/2020-03-02-linear-scratch.html | 2 +- posts/2020-03-08-keras-neural-non-linear.html | 2 +- posts/2020-03-26-gp.html | 18 +- ...rning_with_bayesian_linear_regression.html | 2 +- posts/2020-03-29-param-learning.html | 6 +- posts/2020-04-16-inverse-transform.html | 2 +- posts/2020-06-26-gp-understand.html | 6 +- posts/2021-05-31-gan.html | 64 +- posts/2021-06-12-setup-mac.html | 2 +- posts/2021-06-14-setup-ipad.html | 2 +- posts/2021-06-16-shortcuts-ipad.html | 2 +- posts/2021-06-17-python-ssh.html | 2 +- posts/2021-06-18-audio-filters.html | 8 +- posts/2021-06-19-blur-affinity.html | 2 +- posts/2021-08-20-bayesian.html | 2 +- posts/2021-09-01-hello-julia-language.html | 2 +- posts/2021-09-03-param-learning-sgd.html | 6 +- posts/2022-01-26-tfp-distributions.html | 6 +- posts/2022-01-28-tfp-linear-regression.html | 2 +- posts/2022-01-29-kl-divergence.html | 2 +- posts/2022-02-04-sampling-normal.html | 2 +- posts/2022-02-05-lr.html | 2 +- posts/2022-02-05-simple-dgm.html | 2 +- posts/2022-02-07-coin-toss.html | 6 +- posts/2022-02-09-autograd-pytorch-jax.html | 2 +- posts/2022-02-09-pytorch-learn-normal.html | 2 +- posts/2022-02-11-matrix.html | 2 +- .../2022-02-11-pytorch-learn-normal-map.html | 2 +- posts/2022-02-12-variational-inference.html | 2 +- posts/2022-02-14-gmm.html | 2 +- posts/2022-02-14-logistic-regression.html | 2 +- posts/2022-02-15-draw-graphical-models.html | 2 +- posts/2022-02-17-ppca.html | 2 +- posts/2022-02-17-pyro-linreg.html | 2 +- posts/2022-02-20-condition-pyro.html | 2 +- ...2022-02-21-coordinate-descent-failure.html | 2 +- posts/2022-02-24-audio-filtering.html | 2 +- posts/2022-10-25-mogp.html | 6 +- posts/2022-10-27-calibration.html | 2 +- ...2-11-20-binomial-poisson-distribution.html | 2 +- posts/2023-01-19-conformal-intro.html | 10 +- posts/2024-06-10-shortcuts-mac.html | 779 ++++++++++++++++++ posts/2024-attention.html | 10 +- posts/2024-forecast.html | 2 +- posts/2024-rnn.html | 6 +- posts/2024-sample-distribution.html | 2 +- posts/attention-sequence.html | 2 +- posts/auto-pytorch.html | 34 +- posts/autoencoder.html | 10 +- posts/bald.html | 2 +- posts/comparing-gp.html | 2 +- posts/fsgm.html | 2 +- posts/logo.html | 2 +- posts/moe.html | 2 +- posts/mv-taylor.html | 2 +- posts/mvn-nn.html | 2 +- posts/networkx-trees.html | 2 +- posts/np.html | 2 +- posts/object-detection.html | 2 +- posts/pinn.html | 2 +- posts/positional-encoding.html | 2 +- posts/residual-torch.html | 2 +- posts/rl.html | 2 +- posts/siren-paper-impl.html | 6 +- posts/siren-paper.html | 2 +- posts/sr.html | 2 +- posts/stacking.html | 2 +- posts/stakcing.html | 2 +- posts/strassen.html | 2 +- posts/svd.html | 2 +- posts/sympy.html | 2 +- posts/tensorboard.html | 2 +- posts/torch-likelihoods.html | 2 +- posts/towards-transformers.html | 6 +- posts/transcript.html | 2 +- posts/vscode-tips/index.html | 4 +- posts/welcome/index.html | 2 +- search.json | 89 +- setup/2024/06/12/shortuts-mac/index.html | 14 + sitemap.xml | 222 ++--- 113 files changed, 1294 insertions(+), 597 deletions(-) create mode 100644 posts/2024-06-10-shortcuts-mac.html create mode 100644 setup/2024/06/12/shortuts-mac/index.html diff --git a/.nojekyll b/.nojekyll index de02490..b2336a6 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -0a65ce19 \ No newline at end of file +8af02f66 \ No newline at end of file diff --git a/index.html b/index.html index a314a37..b7faeff 100644 --- a/index.html +++ b/index.html @@ -2,7 +2,7 @@ - + @@ -184,7 +184,7 @@

Blog

+
Categories
All (107)
JAX (1)
LA (1)
ML (88)
PyTorch (3)
TF (3)
TFP (3)
academia (2)
air quality (1)
markdown (1)
ml (2)
news (1)
setup (6)
sustainability (1)
visualisation (5)
vscode (1)
@@ -198,7 +198,37 @@
Categories
-
+
+
+

+
 
+

+
+
+

+Keyboard shortcuts on Mac +

+
+ +
+
+
+setup +
+
+ +
+ +
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
@@ -1188,7 +1218,7 @@

-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+

 
@@ -1450,7 +1480,7 @@

- +
@@ -1492,7 +1522,7 @@

-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+

Shortcut
+
@@ -352,7 +351,6 @@

Downloading weather data

-

Now, we need to fix the timezone.

df = df.tz_localize("Asia/Kolkata").tz_convert("US/Central")
@@ -360,10 +358,9 @@

Downloading weather data

df.head()
-
-
+
@@ -403,7 +400,6 @@

Downloading weather data

-

I’ll now export this file to a CSV to use it for following demonstrations on aggregations on time series.

df.to_csv("weather.csv")
diff --git a/posts/2013-05-01-aggregation-timeseries.html b/posts/2013-05-01-aggregation-timeseries.html index feb56af..3c98e37 100644 --- a/posts/2013-05-01-aggregation-timeseries.html +++ b/posts/2013-05-01-aggregation-timeseries.html @@ -2,7 +2,7 @@ - + @@ -212,9 +212,8 @@

On this page

df.head()
-
-
+
@@ -254,7 +253,6 @@

On this page

-

Question 1: What is the mean temperature and humidity per hour of the day?

We’ll create a new column in the df containing the hour information from the index.

@@ -265,9 +263,8 @@

df.head()
-
-

+
@@ -313,15 +310,13 @@

mean_temp_humidity = df.groupby("hour").mean()
 mean_temp_humidity.head()
-
-

+
@@ -366,7 +361,6 @@

mean_temp_humidity.plot(subplots=True);
@@ -385,9 +379,8 @@

mean_temp_humidity_pivoting.head()

-
-

+
@@ -432,7 +425,6 @@

@@ -445,9 +437,8 @@

df.head()
-
-

+
@@ -499,7 +490,6 @@

daily_temp = pd.pivot_table(df, index=["hour"], columns=["day"], values=["temperature"])
@@ -507,9 +497,8 @@

daily_temp.head()
-
-

+
@@ -692,7 +681,6 @@

daily_temp.plot(style='k-', alpha=0.3, legend=False)
 plt.ylabel("Temp");
diff --git a/posts/2013-06-01-hmm_simulate.html b/posts/2013-06-01-hmm_simulate.html index 4d716e1..c09c0f1 100644 --- a/posts/2013-06-01-hmm_simulate.html +++ b/posts/2013-06-01-hmm_simulate.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2013-07-01-hmm_continuous.html b/posts/2013-07-01-hmm_continuous.html index 7838fe1..ca11131 100644 --- a/posts/2013-07-01-hmm_continuous.html +++ b/posts/2013-07-01-hmm_continuous.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2013-09-01-denoising.html b/posts/2013-09-01-denoising.html index 5f40c9d..8d7084b 100644 --- a/posts/2013-09-01-denoising.html +++ b/posts/2013-09-01-denoising.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2014-05-01-dtw.html b/posts/2014-05-01-dtw.html index 28abe56..086c503 100644 --- a/posts/2014-05-01-dtw.html +++ b/posts/2014-05-01-dtw.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2014-05-01-gibbs-sampling.html b/posts/2014-05-01-gibbs-sampling.html index 2f5b5b3..44a04c5 100644 --- a/posts/2014-05-01-gibbs-sampling.html +++ b/posts/2014-05-01-gibbs-sampling.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2014-06-01-em.html b/posts/2014-06-01-em.html index 2ecdd00..6fab676 100644 --- a/posts/2014-06-01-em.html +++ b/posts/2014-06-01-em.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2014-06-02-latexify.html b/posts/2014-06-02-latexify.html index 58b4af6..c6bcb25 100644 --- a/posts/2014-06-02-latexify.html +++ b/posts/2014-06-02-latexify.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2014-07-01-mcmc_coins.html b/posts/2014-07-01-mcmc_coins.html index 31ee65d..e19bb41 100644 --- a/posts/2014-07-01-mcmc_coins.html +++ b/posts/2014-07-01-mcmc_coins.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2017-04-19-nmf-out-matrix.html b/posts/2017-04-19-nmf-out-matrix.html index 034c4b3..932a7ed 100644 --- a/posts/2017-04-19-nmf-out-matrix.html +++ b/posts/2017-04-19-nmf-out-matrix.html @@ -2,7 +2,7 @@ - + @@ -272,9 +272,8 @@

Defining matrix A

pd.DataFrame(A_orig).head()
-
-
+
@@ -362,7 +361,6 @@

Defining matrix A

-

Masking a few entries

@@ -389,9 +387,8 @@

Masking a few entrie A_df.head()
-
-

+
@@ -479,7 +476,6 @@

Masking a few entrie -

Defining matrices W and H (learning on M-1 users and N movies)

@@ -494,9 +490,8 @@

pd.DataFrame(W).head()
-
-

+
@@ -548,14 +543,12 @@

pd.DataFrame(H).head()
-
-

+
@@ -630,7 +623,6 @@

Defining the cost that we want to minimise

@@ -701,9 +693,8 @@

Alternating NNL A_pred.head()
-
-

+
@@ -791,7 +782,6 @@

Alternating NNL -

Learning home factors for \(M^{th}\) home

diff --git a/posts/2017-04-20-parafac-out-tensor.html b/posts/2017-04-20-parafac-out-tensor.html index 6125395..339918d 100644 --- a/posts/2017-04-20-parafac-out-tensor.html +++ b/posts/2017-04-20-parafac-out-tensor.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2017-04-21-constrained-nmf-cvx.html b/posts/2017-04-21-constrained-nmf-cvx.html index 75b577e..125c31c 100644 --- a/posts/2017-04-21-constrained-nmf-cvx.html +++ b/posts/2017-04-21-constrained-nmf-cvx.html @@ -2,7 +2,7 @@ - + @@ -220,9 +220,8 @@

Creating a ratin pd.DataFrame(A).head()
-
-

+
@@ -322,7 +321,6 @@

Creating a ratin -

We can see that for each user, the 0th item has higher rating compared to the 5th, 1st more than the 6th and so on. Now, in our alternating least squares implementation, we break down A as Y.X. Here X has dimensions of K, N. To ensure the relationship among the items, we will put contraints on X of the form: X[:, 0] > 2 x X[:, 5] and so on. We will create a simple for loop for the same.

e = "["
@@ -400,9 +398,8 @@ 

pd.DataFrame(X)

-
-

+
@@ -472,14 +469,12 @@

-
pd.DataFrame(X_c)
-
-

+
@@ -549,7 +544,6 @@

-

Ok. The obtained X matrix looks fairly similar. How about we reverse the constraints.

e_rev = "["
@@ -568,9 +562,8 @@ 

pd.DataFrame(X_c_rev)

-
-

+
@@ -640,7 +633,6 @@

-

There you go! We now have learnt latent factors that conform to our constraints.

diff --git a/posts/2017-06-14-widgets-matplotlib.html b/posts/2017-06-14-widgets-matplotlib.html index c5dd1d8..35eac58 100644 --- a/posts/2017-06-14-widgets-matplotlib.html +++ b/posts/2017-06-14-widgets-matplotlib.html @@ -2,7 +2,7 @@ - + @@ -235,9 +235,8 @@

Creating the data

df.head()[range(5)]
-
-
+
@@ -295,7 +294,6 @@

Creating the data

-
fig, ax  = plt.subplots()
 df.plot(ax=ax)
diff --git a/posts/2017-06-15-linear-regression-prior.html b/posts/2017-06-15-linear-regression-prior.html index 33422a3..926e515 100644 --- a/posts/2017-06-15-linear-regression-prior.html +++ b/posts/2017-06-15-linear-regression-prior.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2017-08-02-fifty-ggplot-python-1.html b/posts/2017-08-02-fifty-ggplot-python-1.html index 89d58de..e1cbd49 100644 --- a/posts/2017-08-02-fifty-ggplot-python-1.html +++ b/posts/2017-08-02-fifty-ggplot-python-1.html @@ -2,7 +2,7 @@ - + @@ -283,9 +283,8 @@

Getting the data

midwest.head().loc[:, ['area'] ]
-
-
+
@@ -319,7 +318,6 @@

Getting the data

-

Default Pandas scatter plot with marker size by population density

diff --git a/posts/2017-08-12-linear-regression-adagrad-vs-gd.html b/posts/2017-08-12-linear-regression-adagrad-vs-gd.html index 4282729..a8ff155 100644 --- a/posts/2017-08-12-linear-regression-adagrad-vs-gd.html +++ b/posts/2017-08-12-linear-regression-adagrad-vs-gd.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2017-08-13-mf-autograd-adagrad.html b/posts/2017-08-13-mf-autograd-adagrad.html index 29dac0e..83b5881 100644 --- a/posts/2017-08-13-mf-autograd-adagrad.html +++ b/posts/2017-08-13-mf-autograd-adagrad.html @@ -2,7 +2,7 @@ - + @@ -363,10 +363,9 @@

pred_df
-
-

+
@@ -406,7 +405,6 @@

Learning rate evolution for W

diff --git a/posts/2017-12-18-recommend-keras.html b/posts/2017-12-18-recommend-keras.html index 378200e..c202688 100644 --- a/posts/2017-12-18-recommend-keras.html +++ b/posts/2017-12-18-recommend-keras.html @@ -2,7 +2,7 @@ - + @@ -262,10 +262,9 @@

Peak into the datase
dataset.head()
-
-

+
@@ -317,7 +316,6 @@

Peak into the datase -

So, each record (row) shows the rating for a user, item (movie) pair. It should be noted that I use item and movie interchangeably in this post.

len(dataset.user_id.unique()), len(dataset.item_id.unique())
@@ -334,10 +332,9 @@

Peak into the datase
dataset.head()
-
-

+
@@ -389,7 +386,6 @@

Peak into the datase -

Train test split

@@ -402,10 +398,9 @@

Train test split

train.head()
-
-

+
@@ -457,15 +452,13 @@

Train test split

-
test.head()
-
-
+
@@ -517,7 +510,6 @@

Train test split

-

Matrix factorisation

@@ -635,10 +627,9 @@

Extractin pd.DataFrame(movie_embedding_learnt).describe()
-
-

+
@@ -702,16 +693,14 @@

Extractin -
user_embedding_learnt = model.get_layer(name='User-Embedding').get_weights()[0]
 pd.DataFrame(user_embedding_learnt).describe()
-
-

+
@@ -775,7 +764,6 @@

Extractin -

We can see that both the user and the item embeddings have negative elements. There are some applications which require that the learnt embeddings be non-negative. This approach is also called non-negative matrix factorisation, which we’ll workout now.

@@ -804,10 +792,9 @@

pd.DataFrame(movie_embedding_learnt).describe()
-
-

+
@@ -871,7 +858,6 @@

diff --git a/posts/2017-12-29-neural-collaborative-filtering.html b/posts/2017-12-29-neural-collaborative-filtering.html index efe4147..8416909 100644 --- a/posts/2017-12-29-neural-collaborative-filtering.html +++ b/posts/2017-12-29-neural-collaborative-filtering.html @@ -2,7 +2,7 @@ - + @@ -233,10 +233,9 @@

Peak into the datase
dataset.head()
-
-

+
@@ -288,7 +287,6 @@

Peak into the datase -

So, each record (row) shows the rating for a user, item (movie) pair. It should be noted that I use item and movie interchangeably in this post.

len(dataset.user_id.unique()), len(dataset.item_id.unique())
@@ -305,10 +303,9 @@

Peak into the datase
dataset.head()
-
-

+
@@ -360,7 +357,6 @@

Peak into the datase -

Train test split

@@ -373,10 +369,9 @@

Train test split

train.head()
-
-

+
@@ -428,7 +423,6 @@

Train test split

-
test.head()
 y_true = test.rating
diff --git a/posts/2018-01-07-cs-phd-lessons.html b/posts/2018-01-07-cs-phd-lessons.html index 33b7128..54ff553 100644 --- a/posts/2018-01-07-cs-phd-lessons.html +++ b/posts/2018-01-07-cs-phd-lessons.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2018-01-13-denoising.html b/posts/2018-01-13-denoising.html index 02f6ca8..90f86fc 100644 --- a/posts/2018-01-13-denoising.html +++ b/posts/2018-01-13-denoising.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2018-06-16-active-committee.html b/posts/2018-06-16-active-committee.html index 5468750..31d07db 100644 --- a/posts/2018-06-16-active-committee.html +++ b/posts/2018-06-16-active-committee.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2018-06-21-aq-india-map.html b/posts/2018-06-21-aq-india-map.html index 2a202f0..9634280 100644 --- a/posts/2018-06-21-aq-india-map.html +++ b/posts/2018-06-21-aq-india-map.html @@ -2,7 +2,7 @@ - + @@ -236,10 +236,9 @@

df

-
-
+
@@ -328,7 +327,6 @@

Downloading World GeoJson file

diff --git a/posts/2018-06-26-map-electricity-access.html b/posts/2018-06-26-map-electricity-access.html index 80ef942..fffbddc 100644 --- a/posts/2018-06-26-map-electricity-access.html +++ b/posts/2018-06-26-map-electricity-access.html @@ -2,7 +2,7 @@ - + @@ -263,10 +263,9 @@

Extracting shapefile<
gdf.head()
-
-

+
@@ -306,7 +305,6 @@

Extracting shapefile< -
@@ -320,10 +318,9 @@

G
df_2016.head()
-
-

+
@@ -361,7 +358,6 @@

G -

In order to visualise electricity access data over the map, we would have to join the GeoPandas object gdf and df_elec

@@ -379,10 +375,9 @@

Joining gdf<
df_2016.head()
-
-

date
+
@@ -427,7 +422,6 @@

Joining gdf< -

Now, we can join the two data sources

merged_df_2016 = gpd.GeoDataFrame(pd.merge(gdf, df_2016, left_on='ADM0_A3', right_on='Code'))
@@ -436,10 +430,9 @@

Joining gdf<
merged_df_2016.head()
-
-

date
+
@@ -491,7 +484,6 @@

Joining gdf< -

Finally plotting!

diff --git a/posts/2018-08-18-placement-preparation-2018-1-hashmap.html b/posts/2018-08-18-placement-preparation-2018-1-hashmap.html index 4cccf47..cc9041e 100644 --- a/posts/2018-08-18-placement-preparation-2018-1-hashmap.html +++ b/posts/2018-08-18-placement-preparation-2018-1-hashmap.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2019-08-20-gaussian-processes.html b/posts/2019-08-20-gaussian-processes.html index 4a46e0d..d44347a 100644 --- a/posts/2019-08-20-gaussian-processes.html +++ b/posts/2019-08-20-gaussian-processes.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2020-01-14-test-markdown-post.html b/posts/2020-01-14-test-markdown-post.html index 1226415..495b452 100644 --- a/posts/2020-01-14-test-markdown-post.html +++ b/posts/2020-01-14-test-markdown-post.html @@ -2,7 +2,7 @@ - + @@ -258,7 +258,7 @@

Code

Tables

-

+
diff --git a/posts/2020-02-20-bayesian-linear-regression.html b/posts/2020-02-20-bayesian-linear-regression.html index 0354d9a..0881189 100644 --- a/posts/2020-02-20-bayesian-linear-regression.html +++ b/posts/2020-02-20-bayesian-linear-regression.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2020-02-28-xor-relu-vector.html b/posts/2020-02-28-xor-relu-vector.html index f2b3e28..e0336d0 100644 --- a/posts/2020-02-28-xor-relu-vector.html +++ b/posts/2020-02-28-xor-relu-vector.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2020-03-02-linear-scratch.html b/posts/2020-03-02-linear-scratch.html index 1f55e0a..732ec97 100644 --- a/posts/2020-03-02-linear-scratch.html +++ b/posts/2020-03-02-linear-scratch.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2020-03-08-keras-neural-non-linear.html b/posts/2020-03-08-keras-neural-non-linear.html index ae3d35d..829d2d8 100644 --- a/posts/2020-03-08-keras-neural-non-linear.html +++ b/posts/2020-03-08-keras-neural-non-linear.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2020-03-26-gp.html b/posts/2020-03-26-gp.html index b1a5161..557b065 100644 --- a/posts/2020-03-26-gp.html +++ b/posts/2020-03-26-gp.html @@ -2,7 +2,7 @@ - + @@ -270,7 +270,6 @@

Covarianc # Preview the kernel's parameters k
-
-

Column 1
+
@@ -304,7 +303,6 @@

Covarianc

rbf.

-

We can see from the above table that our kernel has two parameters, variance and lengthscale, both with value 1.0. There is also information on the constraints and priors on each parameter, but we will look at this later.

@@ -11572,7 +11570,6 @@

Optimizing ke
m
-
- +
@@ -11627,7 +11624,6 @@

Optimizing ke

GP_regression.
-
m.plot()
 ax = plt.gca()
@@ -11737,7 +11733,6 @@ 

Other kernels

m
-
- +
@@ -11798,7 +11793,6 @@

Other kernels

GP_regression.
-
m.plot()
 ax = plt.gca()
@@ -12049,10 +12043,9 @@ 

Air quality 2d map

df
-
- +
@@ -12164,7 +12157,6 @@

Air quality 2d map

-
import geopandas
 gdf = geopandas.GeoDataFrame(
diff --git a/posts/2020-03-28-active_learning_with_bayesian_linear_regression.html b/posts/2020-03-28-active_learning_with_bayesian_linear_regression.html
index 535c813..7af5b11 100644
--- a/posts/2020-03-28-active_learning_with_bayesian_linear_regression.html
+++ b/posts/2020-03-28-active_learning_with_bayesian_linear_regression.html
@@ -2,7 +2,7 @@
 
 
 
-
+
 
 
 
diff --git a/posts/2020-03-29-param-learning.html b/posts/2020-03-29-param-learning.html
index 053e3b3..b3397b1 100644
--- a/posts/2020-03-29-param-learning.html
+++ b/posts/2020-03-29-param-learning.html
@@ -2,7 +2,7 @@
 
 
 
-
+
 
 
 
@@ -322,7 +322,6 @@ 

Defining GPy’s # Preview the kernel's parameters k

-
-
+
@@ -356,7 +355,6 @@

Defining GPy’s

rbf.
-

Matching our RBF kernel with GPy’s kernel

diff --git a/posts/2020-04-16-inverse-transform.html b/posts/2020-04-16-inverse-transform.html index 68facf7..cef494c 100644 --- a/posts/2020-04-16-inverse-transform.html +++ b/posts/2020-04-16-inverse-transform.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2020-06-26-gp-understand.html b/posts/2020-06-26-gp-understand.html index a0e97cd..509d167 100644 --- a/posts/2020-06-26-gp-understand.html +++ b/posts/2020-06-26-gp-understand.html @@ -2,7 +2,7 @@ - + @@ -266,7 +266,6 @@

Covarianc # Preview the kernel's parameters k

-
- +
@@ -300,7 +299,6 @@

Covarianc

rbf.
-
fig, ax = plt.subplots()
 from matplotlib.animation import FuncAnimation
diff --git a/posts/2021-05-31-gan.html b/posts/2021-05-31-gan.html
index e539ba1..a663528 100644
--- a/posts/2021-05-31-gan.html
+++ b/posts/2021-05-31-gan.html
@@ -2,7 +2,7 @@
 
 
 
-
+
 
 
 
@@ -292,8 +292,7 @@ 

Creating “tru plot_faces(faces, subset=700)

-
- +
@@ -391,7 +390,6 @@

Creating “tru

-

The above shows some samples drawn from the true distibution. Let us also now create some random/noisy samples. These samples do not have any relationship between the 4 positions.

# Examples of noisy images
@@ -402,8 +400,7 @@ 

Creating “tru
plot_faces(noise)
-
- +
@@ -499,7 +496,7 @@

Creating “tru

- +
@@ -595,7 +592,7 @@

Creating “tru

- +
@@ -691,7 +688,7 @@

Creating “tru

- +
@@ -787,7 +784,7 @@

Creating “tru

- +
@@ -885,7 +882,6 @@

Creating “tru

-

Creating the discriminator

@@ -1020,8 +1016,7 @@

Generati
plot_faces(gen_fake(20)[0])
-
- +
@@ -1117,7 +1112,7 @@

Generati

- +
@@ -1213,7 +1208,7 @@

Generati

- +
@@ -1267,7 +1262,6 @@

Generati

-

Sampling from the Real (Train) Dataset

@@ -1281,8 +1275,7 @@

Sampl
plot_faces(gen_real(20)[0])
-
- +
@@ -1378,7 +1371,7 @@

Sampl

- +
@@ -1474,7 +1467,7 @@

Sampl

- +
@@ -1528,7 +1521,6 @@

Sampl

-

We can clearly see the pattern in the images coming from the training dataset.

@@ -1668,8 +1660,7 @@

plot_faces(gen_fake(20)[0])
-
- +
@@ -1765,7 +1756,7 @@

+

@@ -1861,7 +1852,7 @@

+

@@ -1915,7 +1906,6 @@

@@ -1930,8 +1920,7 @@

Visuali
media.show_images(o,  border=True, columns=5, height=80, cmap='Greys')
-
-

+
@@ -1994,7 +1983,7 @@

Visuali

- +
@@ -2057,7 +2046,7 @@

Visuali

- +
@@ -2120,7 +2109,7 @@

Visuali

- +
@@ -2183,7 +2172,7 @@

Visuali

- +
@@ -2246,7 +2235,7 @@

Visuali

- +
@@ -2309,7 +2298,7 @@

Visuali

- +
@@ -2372,7 +2361,7 @@

Visuali

- +
@@ -2435,7 +2424,7 @@

Visuali

- +
@@ -2498,7 +2487,7 @@

Visuali

- +
@@ -2563,7 +2552,6 @@

Visuali

-

We can see above the improvement of the generation over the different iterations and different inputs! That is it for this article. Happing GANning.

diff --git a/posts/2021-06-12-setup-mac.html b/posts/2021-06-12-setup-mac.html index f43f6f6..70743f5 100644 --- a/posts/2021-06-12-setup-mac.html +++ b/posts/2021-06-12-setup-mac.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2021-06-14-setup-ipad.html b/posts/2021-06-14-setup-ipad.html index bedefc1..4787700 100644 --- a/posts/2021-06-14-setup-ipad.html +++ b/posts/2021-06-14-setup-ipad.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2021-06-16-shortcuts-ipad.html b/posts/2021-06-16-shortcuts-ipad.html index 85432d7..92d3c35 100644 --- a/posts/2021-06-16-shortcuts-ipad.html +++ b/posts/2021-06-16-shortcuts-ipad.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2021-06-17-python-ssh.html b/posts/2021-06-17-python-ssh.html index b079d40..2706f33 100644 --- a/posts/2021-06-17-python-ssh.html +++ b/posts/2021-06-17-python-ssh.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2021-06-18-audio-filters.html b/posts/2021-06-18-audio-filters.html index 6d0481b..c84fa41 100644 --- a/posts/2021-06-18-audio-filters.html +++ b/posts/2021-06-18-audio-filters.html @@ -2,7 +2,7 @@ - + @@ -399,8 +399,7 @@

Introduction

'imovie':imovie}, cmap='magma', columns=4, height=200 )
-
- +
@@ -452,7 +451,7 @@

Introduction

- +
@@ -484,7 +483,6 @@

Introduction

-
!sox test-audacity.wav output.dat
diff --git a/posts/2021-06-19-blur-affinity.html b/posts/2021-06-19-blur-affinity.html index 23118fd..cd5283d 100644 --- a/posts/2021-06-19-blur-affinity.html +++ b/posts/2021-06-19-blur-affinity.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2021-08-20-bayesian.html b/posts/2021-08-20-bayesian.html index a0f2a20..b7a2b88 100644 --- a/posts/2021-08-20-bayesian.html +++ b/posts/2021-08-20-bayesian.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2021-09-01-hello-julia-language.html b/posts/2021-09-01-hello-julia-language.html index 5675c72..551eead 100644 --- a/posts/2021-09-01-hello-julia-language.html +++ b/posts/2021-09-01-hello-julia-language.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2021-09-03-param-learning-sgd.html b/posts/2021-09-03-param-learning-sgd.html index f9b7629..6193b49 100644 --- a/posts/2021-09-03-param-learning-sgd.html +++ b/posts/2021-09-03-param-learning-sgd.html @@ -2,7 +2,7 @@ - + @@ -307,7 +307,6 @@

Defining GPy’s # Preview the kernel's parameters k

-
- +
@@ -341,7 +340,6 @@

Defining GPy’s

rbf.
-

Matching our RBF kernel with GPy’s kernel

diff --git a/posts/2022-01-26-tfp-distributions.html b/posts/2022-01-26-tfp-distributions.html index 932505a..d421426 100644 --- a/posts/2022-01-26-tfp-distributions.html +++ b/posts/2022-01-26-tfp-distributions.html @@ -2,7 +2,7 @@ - + @@ -602,10 +602,9 @@

Multivariate Normal
mv_data

-
- +
@@ -676,7 +675,6 @@

Multivariate Normal - diff --git a/posts/2022-01-28-tfp-linear-regression.html b/posts/2022-01-28-tfp-linear-regression.html index e96b49d..0574a32 100644 --- a/posts/2022-01-28-tfp-linear-regression.html +++ b/posts/2022-01-28-tfp-linear-regression.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-01-29-kl-divergence.html b/posts/2022-01-29-kl-divergence.html index dca2945..37b2b5e 100644 --- a/posts/2022-01-29-kl-divergence.html +++ b/posts/2022-01-29-kl-divergence.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-04-sampling-normal.html b/posts/2022-02-04-sampling-normal.html index 048d3b0..965a1f4 100644 --- a/posts/2022-02-04-sampling-normal.html +++ b/posts/2022-02-04-sampling-normal.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-05-lr.html b/posts/2022-02-05-lr.html index e0cecaa..d749e3d 100644 --- a/posts/2022-02-05-lr.html +++ b/posts/2022-02-05-lr.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-05-simple-dgm.html b/posts/2022-02-05-simple-dgm.html index 12d10ba..1d1126c 100644 --- a/posts/2022-02-05-simple-dgm.html +++ b/posts/2022-02-05-simple-dgm.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-07-coin-toss.html b/posts/2022-02-07-coin-toss.html index 0a07669..e8eac06 100644 --- a/posts/2022-02-07-coin-toss.html +++ b/posts/2022-02-07-coin-toss.html @@ -2,7 +2,7 @@ - + @@ -786,10 +786,9 @@

pd.DataFrame(c)
-
-

+
@@ -956,7 +955,6 @@

sns.histplot(tf.reduce_sum(tf.cast(c, tf.float32), axis=1), bins=11)
 sns.despine()
diff --git a/posts/2022-02-09-autograd-pytorch-jax.html b/posts/2022-02-09-autograd-pytorch-jax.html index 5108843..29ae87c 100644 --- a/posts/2022-02-09-autograd-pytorch-jax.html +++ b/posts/2022-02-09-autograd-pytorch-jax.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-09-pytorch-learn-normal.html b/posts/2022-02-09-pytorch-learn-normal.html index 4729514..848aa4b 100644 --- a/posts/2022-02-09-pytorch-learn-normal.html +++ b/posts/2022-02-09-pytorch-learn-normal.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-11-matrix.html b/posts/2022-02-11-matrix.html index 6253ec0..04d60c6 100644 --- a/posts/2022-02-11-matrix.html +++ b/posts/2022-02-11-matrix.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-11-pytorch-learn-normal-map.html b/posts/2022-02-11-pytorch-learn-normal-map.html index b7a07de..0c12471 100644 --- a/posts/2022-02-11-pytorch-learn-normal-map.html +++ b/posts/2022-02-11-pytorch-learn-normal-map.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-12-variational-inference.html b/posts/2022-02-12-variational-inference.html index 16176f5..216cba3 100644 --- a/posts/2022-02-12-variational-inference.html +++ b/posts/2022-02-12-variational-inference.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-14-gmm.html b/posts/2022-02-14-gmm.html index 6ea9e80..9e32a1f 100644 --- a/posts/2022-02-14-gmm.html +++ b/posts/2022-02-14-gmm.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-14-logistic-regression.html b/posts/2022-02-14-logistic-regression.html index 2488c6f..04eaec5 100644 --- a/posts/2022-02-14-logistic-regression.html +++ b/posts/2022-02-14-logistic-regression.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-15-draw-graphical-models.html b/posts/2022-02-15-draw-graphical-models.html index ef22fdf..05912b2 100644 --- a/posts/2022-02-15-draw-graphical-models.html +++ b/posts/2022-02-15-draw-graphical-models.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-17-ppca.html b/posts/2022-02-17-ppca.html index ce1e08d..79cf4f8 100644 --- a/posts/2022-02-17-ppca.html +++ b/posts/2022-02-17-ppca.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-17-pyro-linreg.html b/posts/2022-02-17-pyro-linreg.html index 04cd69e..0f0277e 100644 --- a/posts/2022-02-17-pyro-linreg.html +++ b/posts/2022-02-17-pyro-linreg.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-20-condition-pyro.html b/posts/2022-02-20-condition-pyro.html index a46a699..af682b1 100644 --- a/posts/2022-02-20-condition-pyro.html +++ b/posts/2022-02-20-condition-pyro.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-21-coordinate-descent-failure.html b/posts/2022-02-21-coordinate-descent-failure.html index 2e40342..dd1bd75 100644 --- a/posts/2022-02-21-coordinate-descent-failure.html +++ b/posts/2022-02-21-coordinate-descent-failure.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-02-24-audio-filtering.html b/posts/2022-02-24-audio-filtering.html index d11be53..ca815c9 100644 --- a/posts/2022-02-24-audio-filtering.html +++ b/posts/2022-02-24-audio-filtering.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-10-25-mogp.html b/posts/2022-10-25-mogp.html index c8a41fd..7c5cca3 100644 --- a/posts/2022-10-25-mogp.html +++ b/posts/2022-10-25-mogp.html @@ -2,7 +2,7 @@ - + @@ -253,7 +253,6 @@

ICM

kernel
-
-
+
@@ -287,7 +286,6 @@

ICM

rbf.
-
def jitter(C,  j = 1e-6):
     return C + np.eye(len(C))*j
diff --git a/posts/2022-10-27-calibration.html b/posts/2022-10-27-calibration.html index f323574..7bb5a56 100644 --- a/posts/2022-10-27-calibration.html +++ b/posts/2022-10-27-calibration.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2022-11-20-binomial-poisson-distribution.html b/posts/2022-11-20-binomial-poisson-distribution.html index 039000f..dbe71ba 100644 --- a/posts/2022-11-20-binomial-poisson-distribution.html +++ b/posts/2022-11-20-binomial-poisson-distribution.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2023-01-19-conformal-intro.html b/posts/2023-01-19-conformal-intro.html index e339be7..5c85b21 100644 --- a/posts/2023-01-19-conformal-intro.html +++ b/posts/2023-01-19-conformal-intro.html @@ -2,7 +2,7 @@ - + @@ -234,10 +234,9 @@

Conformal Prediction

prob_df.head()
-
- +
@@ -283,7 +282,6 @@

Conformal Prediction

-
pd.Series(prob_df.values[np.arange(400), y_valid]).quantile(0.1)
@@ -319,10 +317,9 @@

Conformal Prediction

# Get the predicted probability for the correct class for each sample
-
-
+
@@ -621,7 +618,6 @@

Conformal Prediction

- diff --git a/posts/2024-06-10-shortcuts-mac.html b/posts/2024-06-10-shortcuts-mac.html new file mode 100644 index 0000000..2e6b0dd --- /dev/null +++ b/posts/2024-06-10-shortcuts-mac.html @@ -0,0 +1,779 @@ + + + + + + + + + + + +Nipun Batra Blog - Keyboard shortcuts on Mac + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+
+
+

Keyboard shortcuts on Mac

+
+
+ Keyboard shortcuts on mac +
+
+
+
setup
+
+
+
+ + +
+ + +
+
Published
+
+

June 12, 2024

+
+
+ + +
+ + +
+ + + + +
+ + + + + +
+

Safari Tab Shortcuts

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ActionShortcut
Go to first tabCommand (⌘) + 1
Go to second tabCommand (⌘) + 2
Go to third tabCommand (⌘) + 3
Go to fourth tabCommand (⌘) + 4
Go to fifth tabCommand (⌘) + 5
Go to sixth tabCommand (⌘) + 6
Go to seventh tabCommand (⌘) + 7
Go to eighth tabCommand (⌘) + 8
Go to last tabCommand (⌘) + 9
Move to the next tabControl (⌃) + Tab
Move to the previous tabControl (⌃) + Shift (⇧) + Tab
+ + + + +
+ + + + + + + \ No newline at end of file diff --git a/posts/2024-attention.html b/posts/2024-attention.html index ff5ae93..5ac3876 100644 --- a/posts/2024-attention.html +++ b/posts/2024-attention.html @@ -2,7 +2,7 @@ - + @@ -216,10 +216,9 @@

Attention

pd.read_csv('names-indian.txt', header=None)
-
- +
@@ -278,7 +277,6 @@

Attention

-
# convert all names to lowercase
 names = pd.read_csv('names-indian.txt', header=None)[0].str.lower().values
@@ -544,10 +542,9 @@

Attention

df
-
-
+
@@ -700,7 +697,6 @@

Attention

-
df.sum(axis=1)
diff --git a/posts/2024-forecast.html b/posts/2024-forecast.html index 3e291bc..26ad133 100644 --- a/posts/2024-forecast.html +++ b/posts/2024-forecast.html @@ -2,7 +2,7 @@ - + diff --git a/posts/2024-rnn.html b/posts/2024-rnn.html index a4e85a8..5cc6d80 100644 --- a/posts/2024-rnn.html +++ b/posts/2024-rnn.html @@ -2,7 +2,7 @@ - + @@ -216,10 +216,9 @@

RNN

pd.read_csv('names-indian.txt', header=None)
-
-
+
@@ -278,7 +277,6 @@

RNN

-
# convert all names to lowercase
 names = pd.read_csv('names-indian.txt', header=None)[0].str.lower().values
diff --git a/posts/2024-sample-distribution.html b/posts/2024-sample-distribution.html index e4bfda7..a66234c 100644 --- a/posts/2024-sample-distribution.html +++ b/posts/2024-sample-distribution.html @@ -2,7 +2,7 @@ - + diff --git a/posts/attention-sequence.html b/posts/attention-sequence.html index 906ec37..b57ffef 100644 --- a/posts/attention-sequence.html +++ b/posts/attention-sequence.html @@ -2,7 +2,7 @@ - + diff --git a/posts/auto-pytorch.html b/posts/auto-pytorch.html index 68e184b..d91a948 100644 --- a/posts/auto-pytorch.html +++ b/posts/auto-pytorch.html @@ -2,7 +2,7 @@ - + @@ -242,10 +242,9 @@

On this page

X_train.head()
-
-
+
@@ -333,7 +332,6 @@

On this page

-
y_train.head()
@@ -362,10 +360,9 @@

On this page

pred_df.head()
-
-
+
@@ -405,7 +402,6 @@

On this page

-
# Use validation dataset to find best hyperparameters for RF
 rf = RandomForestRegressor(random_state=1)
@@ -1006,11 +1002,10 @@ 

On this page

progress_reporter=reporter)
-

Trial Progress

-
+
@@ -2154,7 +2149,6 @@

Trial Progress

} -
2023-02-28 10:09:11,027 INFO tune.py:762 -- Total run time: 26.09 seconds (25.84 seconds for the tuning loop).
@@ -2170,10 +2164,9 @@

Trial Progress

df[["loss", "config/l1", "config/l2", "config/l3", "config/lr", "config/l2_reg"]]
-
-
Trial name
+
@@ -2651,7 +2644,6 @@

Trial Progress

-
# Print the best hyperparameters
 
@@ -2702,10 +2694,9 @@ 

Trial Progress

pd.DataFrame({"y_test": y_test, "y_pred": y_pred.reshape(-1)})
-
-
+
@@ -2776,7 +2767,6 @@

Trial Progress

-
# Thus far it seems even with hyperparameter tuning we are unable to match the performance of ensemble models. 
 
@@ -2785,10 +2775,9 @@ 

Trial Progress

df[['loss', 'config/l1', 'config/l2', 'config/l3', 'config/lr', 'config/l2_reg']].head(5)
-
-
+
@@ -2861,7 +2850,6 @@

Trial Progress

-
config_list_of_dicts = df[['config/l1', 'config/l2', 'config/l3', 'config/lr', 'config/l2_reg']].head(5).to_dict('records')
 
@@ -3018,10 +3006,9 @@ 

### Decoder outputs

pd.DataFrame(context_input)
-
-
+
@@ -3115,7 +3102,6 @@

### Decoder outputs

-
encoder(context_input).shape
@@ -3360,10 +3346,9 @@

### Decoder outputs

pd.DataFrame({"y_pred": y_pred.reshape(-1), "y_query": y_query})
-
-
+
@@ -3434,7 +3419,6 @@

### Decoder outputs

- diff --git a/posts/autoencoder.html b/posts/autoencoder.html index 66ca517..7823eaf 100644 --- a/posts/autoencoder.html +++ b/posts/autoencoder.html @@ -2,7 +2,7 @@ - + @@ -1026,10 +1026,9 @@

Reconstruction

err_df.groupby("label").mean()
-
-
+
@@ -1087,7 +1086,6 @@

Reconstruction

-
err_df = pd.DataFrame({"error": errs, "label": y})
@@ -1095,10 +1093,9 @@

Reconstruction

err_df.groupby("label").mean()
-
-
+
@@ -1156,7 +1153,6 @@

Reconstruction

-

Convoluational AE

diff --git a/posts/bald.html b/posts/bald.html index 9fd17e4..6898950 100644 --- a/posts/bald.html +++ b/posts/bald.html @@ -2,7 +2,7 @@ - + diff --git a/posts/comparing-gp.html b/posts/comparing-gp.html index 1b1d744..b07a004 100644 --- a/posts/comparing-gp.html +++ b/posts/comparing-gp.html @@ -2,7 +2,7 @@ - + diff --git a/posts/fsgm.html b/posts/fsgm.html index a0409c0..feac72b 100644 --- a/posts/fsgm.html +++ b/posts/fsgm.html @@ -2,7 +2,7 @@ - + diff --git a/posts/logo.html b/posts/logo.html index c23c81b..4d38fc6 100644 --- a/posts/logo.html +++ b/posts/logo.html @@ -2,7 +2,7 @@ - + diff --git a/posts/moe.html b/posts/moe.html index c4d2549..38b4f74 100644 --- a/posts/moe.html +++ b/posts/moe.html @@ -2,7 +2,7 @@ - + diff --git a/posts/mv-taylor.html b/posts/mv-taylor.html index 7ab1dd9..3271e22 100644 --- a/posts/mv-taylor.html +++ b/posts/mv-taylor.html @@ -2,7 +2,7 @@ - + diff --git a/posts/mvn-nn.html b/posts/mvn-nn.html index 8f1afa7..9ceb2f3 100644 --- a/posts/mvn-nn.html +++ b/posts/mvn-nn.html @@ -2,7 +2,7 @@ - + diff --git a/posts/networkx-trees.html b/posts/networkx-trees.html index 2b5ddb9..02aac7b 100644 --- a/posts/networkx-trees.html +++ b/posts/networkx-trees.html @@ -2,7 +2,7 @@ - + diff --git a/posts/np.html b/posts/np.html index fc59a72..0f4a765 100644 --- a/posts/np.html +++ b/posts/np.html @@ -2,7 +2,7 @@ - + diff --git a/posts/object-detection.html b/posts/object-detection.html index 5e007f6..7bf2aa7 100644 --- a/posts/object-detection.html +++ b/posts/object-detection.html @@ -2,7 +2,7 @@ - + diff --git a/posts/pinn.html b/posts/pinn.html index c3174ff..155c88a 100644 --- a/posts/pinn.html +++ b/posts/pinn.html @@ -2,7 +2,7 @@ - + diff --git a/posts/positional-encoding.html b/posts/positional-encoding.html index ded0487..5633ee4 100644 --- a/posts/positional-encoding.html +++ b/posts/positional-encoding.html @@ -2,7 +2,7 @@ - + diff --git a/posts/residual-torch.html b/posts/residual-torch.html index 57ffa92..d0a649d 100644 --- a/posts/residual-torch.html +++ b/posts/residual-torch.html @@ -2,7 +2,7 @@ - + diff --git a/posts/rl.html b/posts/rl.html index 4e6fb60..a2c8d7f 100644 --- a/posts/rl.html +++ b/posts/rl.html @@ -2,7 +2,7 @@ - + diff --git a/posts/siren-paper-impl.html b/posts/siren-paper-impl.html index 1015904..b4bf774 100644 --- a/posts/siren-paper-impl.html +++ b/posts/siren-paper-impl.html @@ -2,7 +2,7 @@ - + @@ -937,10 +937,9 @@

Animatio
df.describe()
-
-

+
@@ -1004,7 +1003,6 @@

Animatio -
audio.shape, pred_y_8_512.shape
diff --git a/posts/siren-paper.html b/posts/siren-paper.html index 35b9e00..a333b07 100644 --- a/posts/siren-paper.html +++ b/posts/siren-paper.html @@ -2,7 +2,7 @@ - + diff --git a/posts/sr.html b/posts/sr.html index 3f97be4..c00162e 100644 --- a/posts/sr.html +++ b/posts/sr.html @@ -2,7 +2,7 @@ - + diff --git a/posts/stacking.html b/posts/stacking.html index 2188bd4..1d27ef2 100644 --- a/posts/stacking.html +++ b/posts/stacking.html @@ -2,7 +2,7 @@ - + diff --git a/posts/stakcing.html b/posts/stakcing.html index 7bfbd4f..a579167 100644 --- a/posts/stakcing.html +++ b/posts/stakcing.html @@ -2,7 +2,7 @@ - + diff --git a/posts/strassen.html b/posts/strassen.html index 8076cdc..afbd048 100644 --- a/posts/strassen.html +++ b/posts/strassen.html @@ -2,7 +2,7 @@ - + diff --git a/posts/svd.html b/posts/svd.html index 92bd672..bae0cff 100644 --- a/posts/svd.html +++ b/posts/svd.html @@ -2,7 +2,7 @@ - + diff --git a/posts/sympy.html b/posts/sympy.html index 558ec8b..1a120df 100644 --- a/posts/sympy.html +++ b/posts/sympy.html @@ -2,7 +2,7 @@ - + diff --git a/posts/tensorboard.html b/posts/tensorboard.html index 08541a5..11779ff 100644 --- a/posts/tensorboard.html +++ b/posts/tensorboard.html @@ -2,7 +2,7 @@ - + diff --git a/posts/torch-likelihoods.html b/posts/torch-likelihoods.html index 03922c4..02623fa 100644 --- a/posts/torch-likelihoods.html +++ b/posts/torch-likelihoods.html @@ -2,7 +2,7 @@ - + diff --git a/posts/towards-transformers.html b/posts/towards-transformers.html index a3eebb4..3f257fe 100644 --- a/posts/towards-transformers.html +++ b/posts/towards-transformers.html @@ -2,7 +2,7 @@ - + @@ -224,10 +224,9 @@

Basic Imports

pd.DataFrame(pd.Series(ser).values.reshape(n,n))
-
-

+
@@ -720,7 +719,6 @@

Basic Imports

- diff --git a/posts/transcript.html b/posts/transcript.html index a28a418..12ced01 100644 --- a/posts/transcript.html +++ b/posts/transcript.html @@ -2,7 +2,7 @@ - + diff --git a/posts/vscode-tips/index.html b/posts/vscode-tips/index.html index 3e25e8a..1ac26a0 100644 --- a/posts/vscode-tips/index.html +++ b/posts/vscode-tips/index.html @@ -2,7 +2,7 @@ - + @@ -149,7 +149,7 @@

VSCode Settings and Tips

Most used keyboard shortcuts

-
+
diff --git a/posts/welcome/index.html b/posts/welcome/index.html index 1d0da3c..51fbe59 100644 --- a/posts/welcome/index.html +++ b/posts/welcome/index.html @@ -2,7 +2,7 @@ - + diff --git a/search.json b/search.json index 9d997f6..e166d6b 100644 --- a/search.json +++ b/search.json @@ -11,7 +11,7 @@ "href": "posts/2017-06-14-widgets-matplotlib.html", "title": "Data exploration using widgets in Matplotlib", "section": "", - "text": "Imagine that you have to do data cleaning on 10s or 100s of sample points (akin to a row in a 2d matrix). For the purposes of data cleaning, you’d also need to zoom/pan at the data correpsonding to each sample point. Would you create 100s of static plots? We lose the zoom/pan ability there. How about we write a simple function and manually change the argument to reflect the sample #.\nIn this post, I’ll be looking at a simple Matplotlib widget to sift through the samples and retain the ability to pan and zoom. This post is heavily inspired by Jake Vanderplas’ PyData 2013 Matplotlib tutorial. I would be creating 15 timeseries having recorded daily for an year for illustration purposes.\n\nSetting the backend to TK.\nFor some reasons, it works better than the default OSX one.\n\n%matplotlib tk\n\n\n\nCustomary imports\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nimport sys\n\n\n\nCreating the data\n\n# Fixing the seed for reproducibility\nnp.random.seed(0)\ndf = pd.DataFrame(np.random.randn(365, 15), index=pd.DatetimeIndex(start='2017',freq='D', periods=365))\n\n\ndf.head()[range(5)]\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n\n\n\n\n2017-01-01\n1.764052\n0.400157\n0.978738\n2.240893\n1.867558\n\n\n2017-01-02\n0.333674\n1.494079\n-0.205158\n0.313068\n-0.854096\n\n\n2017-01-03\n0.154947\n0.378163\n-0.887786\n-1.980796\n-0.347912\n\n\n2017-01-04\n-0.438074\n-1.252795\n0.777490\n-1.613898\n-0.212740\n\n\n2017-01-05\n-0.672460\n-0.359553\n-0.813146\n-1.726283\n0.177426\n\n\n\n\n\n\n\n\n\nfig, ax = plt.subplots()\ndf.plot(ax=ax)\n\nNotice, that since I used %matplotlib TK backend, I don’t see the plot embedded in the notebook. Thus I’ll save the current figure as an image and then link it here.\n\nplt.savefig(\"all_data.png\")\n\n\nThis sure does not look pretty.\n\n\nProposed solution\n\nGreat. It seems to do the intended job. Let us now look at the individual pieces and how we can tie them up.\n\n\nCreating the initial frame\nIn the first frame we would like to plot the data for the first sample.\nfig, ax = plt.subplots()\ndf[0].plot(ax=ax, title=\"Sample number: 0\")\n\n\nCreating the buttons at the bottom\nFirst, we’d want to make space for the button at the bottom and place them there. We can do this as follows:\nfrom matplotlib.widgets import Button\n\nfig.subplots_adjust(bottom=0.2)\n\naxprev = plt.axes([0.7, 0.05, 0.1, 0.075])\naxnext = plt.axes([0.81, 0.05, 0.1, 0.075])\n\nbnext = Button(axnext, '>')\nbprev = Button(axprev, '<')\n\n\nLinking the buttons to functions\nWe’d next want to call some function each time either of the two buttons are pressed. We would also need a notion of currently selected data point. The idea would be that each time, > is pressed, we advance the currently selected point and plot correspondingly.\nWe’d have to define next() and prev() where we increment and decrement the selected data point.\n\nclass Index:\n data = df\n selected = 0\n \n def next(self, event):\n self.selected += 1\n ax.cla()\n df[self.selected].plot(ax=ax)\n ax.set_title(\"Sample number: %d\" %self.selected)\n\n def prev(self, event):\n self.selected -= 1\n ax.cla()\n df[self.selected].plot(ax=ax)\n ax.set_title(\"Sample number: %d\" %self.selected)\nHere, ax.cla() clears the data for the current data point before drawing for the next one. df[self.selected].plot(ax=ax) plots for the newly selected data. ax.set_title(\"Sample number: %d\" %self.selected) would change the title to reflect the currently used data point.\nWe can link to callback as follows:\ncallback = Index()\n\nbnext.on_clicked(callback.next)\nbprev.on_clicked(callback.prev)\n\n\nEnsuring we do not select data point out of range\nIf you notice, we simply incremented and decremented the selected data point without considering going beyond (0, number of data points). So, we need to change the call back functions to check that we do not go beyond the range. This would require the following changes to next() with the changes to prev() being similar.\ndata_min = 0\ndata_max = data.shape[1]-1\nselected = 0\ndef next(self, event):\n if self.selected >=self.data_max:\n self.selected = self.data_max\n ax.set_title('Last sample reached. Cannot go forwards')\n else:\n self.selected += 1\n ax.cla()\n df[self.selected].plot(ax=ax)\n ax.set_title(\"Sample number: %d\" %self.selected)\nThere you go. This was fairly simple and fun to do, and yet can be very helpful!\n\n\nComplete code\n\nfrom matplotlib.widgets import Button\n\nfig, ax = plt.subplots()\nfig.subplots_adjust(bottom=0.2)\n\ndf[0].plot(ax=ax, title=\"Sample number: 0\")\n\nclass Index:\n data = df\n data_min = 0\n data_max = data.shape[1]-1\n selected = 0\n def next(self, event):\n if self.selected >=self.data_max:\n self.selected = self.data_max\n ax.set_title('Last sample reached. Cannot go forwards')\n else:\n self.selected += 1\n ax.cla()\n df[self.selected].plot(ax=ax)\n ax.set_title(\"Sample number: %d\" %self.selected)\n\n def prev(self, event):\n if self.selected <=self.data_min:\n self.selected = 0\n ax.set_title('First sample reached. Cannot go backwards')\n else:\n self.selected -= 1\n ax.cla()\n df[self.selected].plot(ax=ax)\n ax.set_title(\"Sample number: %d\" %self.selected)\n \n\ncallback = Index()\naxprev = plt.axes([0.7, 0.05, 0.1, 0.075])\naxnext = plt.axes([0.81, 0.05, 0.1, 0.075])\n\nbnext = Button(axnext, '>')\nbnext.on_clicked(callback.next)\n\nbprev = Button(axprev, '<')\nbprev.on_clicked(callback.prev)\n\n0\n\n\n\n\nAdvanced example\nHere is another slightly more advanced wideget use case in action.\n\nI will just put the code up here and leave the understanding upto the reader as an exercise.\n\nwith pd.HDFStore('temp-store.h5', mode='w') as st:\n\n # 15 home-> 2 columns, 365 rows (daily one reading)\n for home in range(15):\n df = pd.DataFrame(np.random.randn(365, 2), columns=['fridge','microwave'],\n index=pd.DatetimeIndex(start='2017',freq='D', periods=365))\n df = df.abs()\n st['/home_%d' %home] = df\n\n\nst = pd.HDFStore('temp-store.h5', mode='r')\n\n\nfrom matplotlib.widgets import Button, CheckButtons\n\nfig, ax = plt.subplots()\nfig.subplots_adjust(bottom=0.2)\nfig.subplots_adjust(left=0.2)\n\nhome_0 = st['/home_0']\n\nrax = plt.axes([0.02, 0.4, 0.13, 0.2], aspect='equal')\n\nlabels = tuple(home_0.columns)\nstates = tuple([True]*len(labels))\ncheck = CheckButtons(rax, labels, states)\n\n\nst['/home_0'].plot(ax=ax, title=\"Sample number: 0\").legend(loc=2)\nlines = ax.get_lines()\n\nclass Index:\n store = st\n data_min = 0\n data_max = len(store.keys())-1\n selected = 0\n st, la = states, labels\n states_dict = dict(zip(la, st))\n def selected_column(self, label):\n self.states_dict[label] = not self.states_dict[label]\n self.plot()\n \n def plot(self):\n ax.cla()\n st['/home_%d' %self.selected].plot(ax=ax, title=\"Sample number: %d\" %self.selected).legend(loc=2)\n lines = ax.get_lines()\n for i ,(l, s) in enumerate(self.states_dict.items()):\n lines[i].set_visible(s)\n plt.legend(loc=1)\n \n \n def next(self, event):\n if self.selected >=self.data_max:\n self.selected = self.data_max\n ax.set_title('Last sample reached. Cannot go forwards')\n else:\n self.selected += 1\n self.plot()\n \n\n def prev(self, event):\n if self.selected <=self.data_min:\n self.selected = 0\n ax.set_title('First sample reached. Cannot go backwards')\n else:\n self.selected -= 1\n self.plot()\n \n\ncallback = Index()\naxprev = plt.axes([0.7, 0.05, 0.1, 0.075])\naxnext = plt.axes([0.81, 0.05, 0.1, 0.075])\n\nbnext = Button(axnext, '>')\nbnext.on_clicked(callback.next)\n\nbprev = Button(axprev, '<')\nbprev.on_clicked(callback.prev)\n\ncheck.on_clicked(callback.selected_column);" + "text": "Imagine that you have to do data cleaning on 10s or 100s of sample points (akin to a row in a 2d matrix). For the purposes of data cleaning, you’d also need to zoom/pan at the data correpsonding to each sample point. Would you create 100s of static plots? We lose the zoom/pan ability there. How about we write a simple function and manually change the argument to reflect the sample #.\nIn this post, I’ll be looking at a simple Matplotlib widget to sift through the samples and retain the ability to pan and zoom. This post is heavily inspired by Jake Vanderplas’ PyData 2013 Matplotlib tutorial. I would be creating 15 timeseries having recorded daily for an year for illustration purposes.\n\nSetting the backend to TK.\nFor some reasons, it works better than the default OSX one.\n\n%matplotlib tk\n\n\n\nCustomary imports\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nimport sys\n\n\n\nCreating the data\n\n# Fixing the seed for reproducibility\nnp.random.seed(0)\ndf = pd.DataFrame(np.random.randn(365, 15), index=pd.DatetimeIndex(start='2017',freq='D', periods=365))\n\n\ndf.head()[range(5)]\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n\n\n\n\n2017-01-01\n1.764052\n0.400157\n0.978738\n2.240893\n1.867558\n\n\n2017-01-02\n0.333674\n1.494079\n-0.205158\n0.313068\n-0.854096\n\n\n2017-01-03\n0.154947\n0.378163\n-0.887786\n-1.980796\n-0.347912\n\n\n2017-01-04\n-0.438074\n-1.252795\n0.777490\n-1.613898\n-0.212740\n\n\n2017-01-05\n-0.672460\n-0.359553\n-0.813146\n-1.726283\n0.177426\n\n\n\n\n\n\n\n\nfig, ax = plt.subplots()\ndf.plot(ax=ax)\n\nNotice, that since I used %matplotlib TK backend, I don’t see the plot embedded in the notebook. Thus I’ll save the current figure as an image and then link it here.\n\nplt.savefig(\"all_data.png\")\n\n\nThis sure does not look pretty.\n\n\nProposed solution\n\nGreat. It seems to do the intended job. Let us now look at the individual pieces and how we can tie them up.\n\n\nCreating the initial frame\nIn the first frame we would like to plot the data for the first sample.\nfig, ax = plt.subplots()\ndf[0].plot(ax=ax, title=\"Sample number: 0\")\n\n\nCreating the buttons at the bottom\nFirst, we’d want to make space for the button at the bottom and place them there. We can do this as follows:\nfrom matplotlib.widgets import Button\n\nfig.subplots_adjust(bottom=0.2)\n\naxprev = plt.axes([0.7, 0.05, 0.1, 0.075])\naxnext = plt.axes([0.81, 0.05, 0.1, 0.075])\n\nbnext = Button(axnext, '>')\nbprev = Button(axprev, '<')\n\n\nLinking the buttons to functions\nWe’d next want to call some function each time either of the two buttons are pressed. We would also need a notion of currently selected data point. The idea would be that each time, > is pressed, we advance the currently selected point and plot correspondingly.\nWe’d have to define next() and prev() where we increment and decrement the selected data point.\n\nclass Index:\n data = df\n selected = 0\n \n def next(self, event):\n self.selected += 1\n ax.cla()\n df[self.selected].plot(ax=ax)\n ax.set_title(\"Sample number: %d\" %self.selected)\n\n def prev(self, event):\n self.selected -= 1\n ax.cla()\n df[self.selected].plot(ax=ax)\n ax.set_title(\"Sample number: %d\" %self.selected)\nHere, ax.cla() clears the data for the current data point before drawing for the next one. df[self.selected].plot(ax=ax) plots for the newly selected data. ax.set_title(\"Sample number: %d\" %self.selected) would change the title to reflect the currently used data point.\nWe can link to callback as follows:\ncallback = Index()\n\nbnext.on_clicked(callback.next)\nbprev.on_clicked(callback.prev)\n\n\nEnsuring we do not select data point out of range\nIf you notice, we simply incremented and decremented the selected data point without considering going beyond (0, number of data points). So, we need to change the call back functions to check that we do not go beyond the range. This would require the following changes to next() with the changes to prev() being similar.\ndata_min = 0\ndata_max = data.shape[1]-1\nselected = 0\ndef next(self, event):\n if self.selected >=self.data_max:\n self.selected = self.data_max\n ax.set_title('Last sample reached. Cannot go forwards')\n else:\n self.selected += 1\n ax.cla()\n df[self.selected].plot(ax=ax)\n ax.set_title(\"Sample number: %d\" %self.selected)\nThere you go. This was fairly simple and fun to do, and yet can be very helpful!\n\n\nComplete code\n\nfrom matplotlib.widgets import Button\n\nfig, ax = plt.subplots()\nfig.subplots_adjust(bottom=0.2)\n\ndf[0].plot(ax=ax, title=\"Sample number: 0\")\n\nclass Index:\n data = df\n data_min = 0\n data_max = data.shape[1]-1\n selected = 0\n def next(self, event):\n if self.selected >=self.data_max:\n self.selected = self.data_max\n ax.set_title('Last sample reached. Cannot go forwards')\n else:\n self.selected += 1\n ax.cla()\n df[self.selected].plot(ax=ax)\n ax.set_title(\"Sample number: %d\" %self.selected)\n\n def prev(self, event):\n if self.selected <=self.data_min:\n self.selected = 0\n ax.set_title('First sample reached. Cannot go backwards')\n else:\n self.selected -= 1\n ax.cla()\n df[self.selected].plot(ax=ax)\n ax.set_title(\"Sample number: %d\" %self.selected)\n \n\ncallback = Index()\naxprev = plt.axes([0.7, 0.05, 0.1, 0.075])\naxnext = plt.axes([0.81, 0.05, 0.1, 0.075])\n\nbnext = Button(axnext, '>')\nbnext.on_clicked(callback.next)\n\nbprev = Button(axprev, '<')\nbprev.on_clicked(callback.prev)\n\n0\n\n\n\n\nAdvanced example\nHere is another slightly more advanced wideget use case in action.\n\nI will just put the code up here and leave the understanding upto the reader as an exercise.\n\nwith pd.HDFStore('temp-store.h5', mode='w') as st:\n\n # 15 home-> 2 columns, 365 rows (daily one reading)\n for home in range(15):\n df = pd.DataFrame(np.random.randn(365, 2), columns=['fridge','microwave'],\n index=pd.DatetimeIndex(start='2017',freq='D', periods=365))\n df = df.abs()\n st['/home_%d' %home] = df\n\n\nst = pd.HDFStore('temp-store.h5', mode='r')\n\n\nfrom matplotlib.widgets import Button, CheckButtons\n\nfig, ax = plt.subplots()\nfig.subplots_adjust(bottom=0.2)\nfig.subplots_adjust(left=0.2)\n\nhome_0 = st['/home_0']\n\nrax = plt.axes([0.02, 0.4, 0.13, 0.2], aspect='equal')\n\nlabels = tuple(home_0.columns)\nstates = tuple([True]*len(labels))\ncheck = CheckButtons(rax, labels, states)\n\n\nst['/home_0'].plot(ax=ax, title=\"Sample number: 0\").legend(loc=2)\nlines = ax.get_lines()\n\nclass Index:\n store = st\n data_min = 0\n data_max = len(store.keys())-1\n selected = 0\n st, la = states, labels\n states_dict = dict(zip(la, st))\n def selected_column(self, label):\n self.states_dict[label] = not self.states_dict[label]\n self.plot()\n \n def plot(self):\n ax.cla()\n st['/home_%d' %self.selected].plot(ax=ax, title=\"Sample number: %d\" %self.selected).legend(loc=2)\n lines = ax.get_lines()\n for i ,(l, s) in enumerate(self.states_dict.items()):\n lines[i].set_visible(s)\n plt.legend(loc=1)\n \n \n def next(self, event):\n if self.selected >=self.data_max:\n self.selected = self.data_max\n ax.set_title('Last sample reached. Cannot go forwards')\n else:\n self.selected += 1\n self.plot()\n \n\n def prev(self, event):\n if self.selected <=self.data_min:\n self.selected = 0\n ax.set_title('First sample reached. Cannot go backwards')\n else:\n self.selected -= 1\n self.plot()\n \n\ncallback = Index()\naxprev = plt.axes([0.7, 0.05, 0.1, 0.075])\naxnext = plt.axes([0.81, 0.05, 0.1, 0.075])\n\nbnext = Button(axnext, '>')\nbnext.on_clicked(callback.next)\n\nbprev = Button(axprev, '<')\nbprev.on_clicked(callback.prev)\n\ncheck.on_clicked(callback.selected_column);" }, { "objectID": "posts/2014-05-01-gibbs-sampling.html", @@ -32,21 +32,21 @@ "href": "posts/2017-12-29-neural-collaborative-filtering.html", "title": "Neural Networks for Collaborative Filtering", "section": "", - "text": "Recently, I had a chance to read an interesting WWW 2017 paper entitled: Neural Collaborative Filtering. The first paragraph of the abstract reads as follows:\n\nIn recent years, deep neural networks have yielded immense success on speech recognition, computer vision and natural language processing. However, the exploration of deep neural networks on recommender systems has received relatively less scrutiny. In this work, we strive to develop techniques based on neural networks to tackle the key problem in recommendation — collaborative filtering — on the basis of implicit feedback.\n\nI’d recently written a blog post on using Keras (deep learning library) for implementing traditional matrix factorization based collaborative filtering. So, I thought to get my hands dirty with building a prototype for the paper mentioned above. The authors have already provided their code on Github, which should serve as a reference for the paper and not my post, whose purpose is merely educational!\nHere’s how the proposed network architecture looks in the paper:\n\nThere are a few terms that we need to understand:\n\nUser (u) and Item (i) are used to create embeddings (low-dimensional) for user and item\nGeneralized Matrix Factorisation (GMF) combines the two embeddings using the dot product. This is our regular matrix factorisation.\nMulti-layer perceptron can also create embeddings for user and items. However, instead of taking a dot product of these to obtain the rating, we can concatenate them to create a feature vector which can be passed on to the further layers.\nNeural MF can then combine the predictions from MLP and GMF to obtain the following prediction.\n\nAs done in my previous post, I’ll use the MovieLens-100k dataset for illustration. Please refer to my previous post for more details.\n\nPeak into the dataset\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport warnings\nwarnings.filterwarnings('ignore')\n\n%matplotlib inline\n\n\ndataset = pd.read_csv(\"/Users/nipun/Downloads/ml-100k/u.data\",sep='\\t',names=\"user_id,item_id,rating,timestamp\".split(\",\"))\n\n\ndataset.head()\n\n\n\n\n\n\n\n\n\nuser_id\nitem_id\nrating\ntimestamp\n\n\n\n\n0\n196\n242\n3\n881250949\n\n\n1\n186\n302\n3\n891717742\n\n\n2\n22\n377\n1\n878887116\n\n\n3\n244\n51\n2\n880606923\n\n\n4\n166\n346\n1\n886397596\n\n\n\n\n\n\n\n\nSo, each record (row) shows the rating for a user, item (movie) pair. It should be noted that I use item and movie interchangeably in this post.\n\nlen(dataset.user_id.unique()), len(dataset.item_id.unique())\n\n(943, 1682)\n\n\nWe assign a unique number between (0, #users) to each user and do the same for movies.\n\ndataset.user_id = dataset.user_id.astype('category').cat.codes.values\ndataset.item_id = dataset.item_id.astype('category').cat.codes.values\n\n\ndataset.head()\n\n\n\n\n\n\n\n\n\nuser_id\nitem_id\nrating\ntimestamp\n\n\n\n\n0\n195\n241\n3\n881250949\n\n\n1\n185\n301\n3\n891717742\n\n\n2\n21\n376\n1\n878887116\n\n\n3\n243\n50\n2\n880606923\n\n\n4\n165\n345\n1\n886397596\n\n\n\n\n\n\n\n\n\n\nTrain test split\nWe’ll now split our dataset of 100k ratings into train (containing 80k ratings) and test (containing 20k ratings). Given the train set, we’d like to accurately estimate the ratings in the test set.\n\nfrom sklearn.model_selection import train_test_split\ntrain, test = train_test_split(dataset, test_size=0.2)\n\n\ntrain.head()\n\n\n\n\n\n\n\n\n\nuser_id\nitem_id\nrating\ntimestamp\n\n\n\n\n13185\n71\n95\n5\n880037203\n\n\n23391\n144\n509\n4\n882181859\n\n\n90744\n895\n50\n2\n887159951\n\n\n3043\n255\n279\n5\n882151167\n\n\n8932\n55\n94\n4\n892683274\n\n\n\n\n\n\n\n\n\ntest.head()\ny_true = test.rating\n\n\n\nCreating the model\n\nimport keras\nn_latent_factors_user = 8\nn_latent_factors_movie = 10\nn_latent_factors_mf = 3\nn_users, n_movies = len(dataset.user_id.unique()), len(dataset.item_id.unique())\n\nmovie_input = keras.layers.Input(shape=[1],name='Item')\nmovie_embedding_mlp = keras.layers.Embedding(n_movies + 1, n_latent_factors_movie, name='Movie-Embedding-MLP')(movie_input)\nmovie_vec_mlp = keras.layers.Flatten(name='FlattenMovies-MLP')(movie_embedding_mlp)\nmovie_vec_mlp = keras.layers.Dropout(0.2)(movie_vec_mlp)\n\nmovie_embedding_mf = keras.layers.Embedding(n_movies + 1, n_latent_factors_mf, name='Movie-Embedding-MF')(movie_input)\nmovie_vec_mf = keras.layers.Flatten(name='FlattenMovies-MF')(movie_embedding_mf)\nmovie_vec_mf = keras.layers.Dropout(0.2)(movie_vec_mf)\n\n\nuser_input = keras.layers.Input(shape=[1],name='User')\nuser_vec_mlp = keras.layers.Flatten(name='FlattenUsers-MLP')(keras.layers.Embedding(n_users + 1, n_latent_factors_user,name='User-Embedding-MLP')(user_input))\nuser_vec_mlp = keras.layers.Dropout(0.2)(user_vec_mlp)\n\nuser_vec_mf = keras.layers.Flatten(name='FlattenUsers-MF')(keras.layers.Embedding(n_users + 1, n_latent_factors_mf,name='User-Embedding-MF')(user_input))\nuser_vec_mf = keras.layers.Dropout(0.2)(user_vec_mf)\n\n\nconcat = keras.layers.merge([movie_vec_mlp, user_vec_mlp], mode='concat',name='Concat')\nconcat_dropout = keras.layers.Dropout(0.2)(concat)\ndense = keras.layers.Dense(200,name='FullyConnected')(concat_dropout)\ndense_batch = keras.layers.BatchNormalization(name='Batch')(dense)\ndropout_1 = keras.layers.Dropout(0.2,name='Dropout-1')(dense_batch)\ndense_2 = keras.layers.Dense(100,name='FullyConnected-1')(dropout_1)\ndense_batch_2 = keras.layers.BatchNormalization(name='Batch-2')(dense_2)\n\n\ndropout_2 = keras.layers.Dropout(0.2,name='Dropout-2')(dense_batch_2)\ndense_3 = keras.layers.Dense(50,name='FullyConnected-2')(dropout_2)\ndense_4 = keras.layers.Dense(20,name='FullyConnected-3', activation='relu')(dense_3)\n\npred_mf = keras.layers.merge([movie_vec_mf, user_vec_mf], mode='dot',name='Dot')\n\n\npred_mlp = keras.layers.Dense(1, activation='relu',name='Activation')(dense_4)\n\ncombine_mlp_mf = keras.layers.merge([pred_mf, pred_mlp], mode='concat',name='Concat-MF-MLP')\nresult_combine = keras.layers.Dense(100,name='Combine-MF-MLP')(combine_mlp_mf)\ndeep_combine = keras.layers.Dense(100,name='FullyConnected-4')(result_combine)\n\n\nresult = keras.layers.Dense(1,name='Prediction')(deep_combine)\n\n\nmodel = keras.Model([user_input, movie_input], result)\nopt = keras.optimizers.Adam(lr =0.01)\nmodel.compile(optimizer='adam',loss= 'mean_absolute_error')\n\nUsing TensorFlow backend.\n\n\nLet’s now see how our model looks like:\n\nfrom IPython.display import SVG\nfrom keras.utils.vis_utils import model_to_dot\nSVG(model_to_dot(model, show_shapes=False, show_layer_names=True, rankdir='HB').create(prog='dot', format='svg'))\n\n\n\n\n\n\n\n\nSo, it wasn’t very complicated to set up. Courtesy Keras, we can do even more complex stuff!\n\nmodel.summary()\n\n__________________________________________________________________________________________________\nLayer (type) Output Shape Param # Connected to \n==================================================================================================\nItem (InputLayer) (None, 1) 0 \n__________________________________________________________________________________________________\nUser (InputLayer) (None, 1) 0 \n__________________________________________________________________________________________________\nMovie-Embedding-MLP (Embedding) (None, 1, 10) 16830 Item[0][0] \n__________________________________________________________________________________________________\nUser-Embedding-MLP (Embedding) (None, 1, 8) 7552 User[0][0] \n__________________________________________________________________________________________________\nFlattenMovies-MLP (Flatten) (None, 10) 0 Movie-Embedding-MLP[0][0] \n__________________________________________________________________________________________________\nFlattenUsers-MLP (Flatten) (None, 8) 0 User-Embedding-MLP[0][0] \n__________________________________________________________________________________________________\ndropout_1 (Dropout) (None, 10) 0 FlattenMovies-MLP[0][0] \n__________________________________________________________________________________________________\ndropout_3 (Dropout) (None, 8) 0 FlattenUsers-MLP[0][0] \n__________________________________________________________________________________________________\nConcat (Merge) (None, 18) 0 dropout_1[0][0] \n dropout_3[0][0] \n__________________________________________________________________________________________________\ndropout_5 (Dropout) (None, 18) 0 Concat[0][0] \n__________________________________________________________________________________________________\nFullyConnected (Dense) (None, 200) 3800 dropout_5[0][0] \n__________________________________________________________________________________________________\nBatch (BatchNormalization) (None, 200) 800 FullyConnected[0][0] \n__________________________________________________________________________________________________\nDropout-1 (Dropout) (None, 200) 0 Batch[0][0] \n__________________________________________________________________________________________________\nFullyConnected-1 (Dense) (None, 100) 20100 Dropout-1[0][0] \n__________________________________________________________________________________________________\nBatch-2 (BatchNormalization) (None, 100) 400 FullyConnected-1[0][0] \n__________________________________________________________________________________________________\nMovie-Embedding-MF (Embedding) (None, 1, 3) 5049 Item[0][0] \n__________________________________________________________________________________________________\nUser-Embedding-MF (Embedding) (None, 1, 3) 2832 User[0][0] \n__________________________________________________________________________________________________\nDropout-2 (Dropout) (None, 100) 0 Batch-2[0][0] \n__________________________________________________________________________________________________\nFlattenMovies-MF (Flatten) (None, 3) 0 Movie-Embedding-MF[0][0] \n__________________________________________________________________________________________________\nFlattenUsers-MF (Flatten) (None, 3) 0 User-Embedding-MF[0][0] \n__________________________________________________________________________________________________\nFullyConnected-2 (Dense) (None, 50) 5050 Dropout-2[0][0] \n__________________________________________________________________________________________________\ndropout_2 (Dropout) (None, 3) 0 FlattenMovies-MF[0][0] \n__________________________________________________________________________________________________\ndropout_4 (Dropout) (None, 3) 0 FlattenUsers-MF[0][0] \n__________________________________________________________________________________________________\nFullyConnected-3 (Dense) (None, 20) 1020 FullyConnected-2[0][0] \n__________________________________________________________________________________________________\nDot (Merge) (None, 1) 0 dropout_2[0][0] \n dropout_4[0][0] \n__________________________________________________________________________________________________\nActivation (Dense) (None, 1) 21 FullyConnected-3[0][0] \n__________________________________________________________________________________________________\nConcat-MF-MLP (Merge) (None, 2) 0 Dot[0][0] \n Activation[0][0] \n__________________________________________________________________________________________________\nCombine-MF-MLP (Dense) (None, 100) 300 Concat-MF-MLP[0][0] \n__________________________________________________________________________________________________\nFullyConnected-4 (Dense) (None, 100) 10100 Combine-MF-MLP[0][0] \n__________________________________________________________________________________________________\nPrediction (Dense) (None, 1) 101 FullyConnected-4[0][0] \n==================================================================================================\nTotal params: 73,955\nTrainable params: 73,355\nNon-trainable params: 600\n__________________________________________________________________________________________________\n\n\nWe can see that the number of parameters is more than what we had in the Matrix Factorisation case. Let’s see how this model works. I’ll run it for more epochs given that we have more parameters.\n\nhistory = model.fit([train.user_id, train.item_id], train.rating, epochs=25, verbose=0, validation_split=0.1)\n\n\n\nPrediction performance of Neural Network based recommender system\n\nfrom sklearn.metrics import mean_absolute_error\ny_hat_2 = np.round(model.predict([test.user_id, test.item_id]),0)\nprint(mean_absolute_error(y_true, y_hat_2))\n\nprint(mean_absolute_error(y_true, model.predict([test.user_id, test.item_id])))\n\n\n0.716\n0.737380115688\n\n\nPretty similar to the result we got using matrix factorisation. This isn’t very optimised, and I am sure doing so, we can make this approach perform much better than GMF!\nThanks for reading. This post has been a good learning experience for me. Hope you enjoyed too!" + "text": "Recently, I had a chance to read an interesting WWW 2017 paper entitled: Neural Collaborative Filtering. The first paragraph of the abstract reads as follows:\n\nIn recent years, deep neural networks have yielded immense success on speech recognition, computer vision and natural language processing. However, the exploration of deep neural networks on recommender systems has received relatively less scrutiny. In this work, we strive to develop techniques based on neural networks to tackle the key problem in recommendation — collaborative filtering — on the basis of implicit feedback.\n\nI’d recently written a blog post on using Keras (deep learning library) for implementing traditional matrix factorization based collaborative filtering. So, I thought to get my hands dirty with building a prototype for the paper mentioned above. The authors have already provided their code on Github, which should serve as a reference for the paper and not my post, whose purpose is merely educational!\nHere’s how the proposed network architecture looks in the paper:\n\nThere are a few terms that we need to understand:\n\nUser (u) and Item (i) are used to create embeddings (low-dimensional) for user and item\nGeneralized Matrix Factorisation (GMF) combines the two embeddings using the dot product. This is our regular matrix factorisation.\nMulti-layer perceptron can also create embeddings for user and items. However, instead of taking a dot product of these to obtain the rating, we can concatenate them to create a feature vector which can be passed on to the further layers.\nNeural MF can then combine the predictions from MLP and GMF to obtain the following prediction.\n\nAs done in my previous post, I’ll use the MovieLens-100k dataset for illustration. Please refer to my previous post for more details.\n\nPeak into the dataset\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport warnings\nwarnings.filterwarnings('ignore')\n\n%matplotlib inline\n\n\ndataset = pd.read_csv(\"/Users/nipun/Downloads/ml-100k/u.data\",sep='\\t',names=\"user_id,item_id,rating,timestamp\".split(\",\"))\n\n\ndataset.head()\n\n\n\n\n\n\n\n\nuser_id\nitem_id\nrating\ntimestamp\n\n\n\n\n0\n196\n242\n3\n881250949\n\n\n1\n186\n302\n3\n891717742\n\n\n2\n22\n377\n1\n878887116\n\n\n3\n244\n51\n2\n880606923\n\n\n4\n166\n346\n1\n886397596\n\n\n\n\n\n\n\nSo, each record (row) shows the rating for a user, item (movie) pair. It should be noted that I use item and movie interchangeably in this post.\n\nlen(dataset.user_id.unique()), len(dataset.item_id.unique())\n\n(943, 1682)\n\n\nWe assign a unique number between (0, #users) to each user and do the same for movies.\n\ndataset.user_id = dataset.user_id.astype('category').cat.codes.values\ndataset.item_id = dataset.item_id.astype('category').cat.codes.values\n\n\ndataset.head()\n\n\n\n\n\n\n\n\nuser_id\nitem_id\nrating\ntimestamp\n\n\n\n\n0\n195\n241\n3\n881250949\n\n\n1\n185\n301\n3\n891717742\n\n\n2\n21\n376\n1\n878887116\n\n\n3\n243\n50\n2\n880606923\n\n\n4\n165\n345\n1\n886397596\n\n\n\n\n\n\n\n\n\nTrain test split\nWe’ll now split our dataset of 100k ratings into train (containing 80k ratings) and test (containing 20k ratings). Given the train set, we’d like to accurately estimate the ratings in the test set.\n\nfrom sklearn.model_selection import train_test_split\ntrain, test = train_test_split(dataset, test_size=0.2)\n\n\ntrain.head()\n\n\n\n\n\n\n\n\nuser_id\nitem_id\nrating\ntimestamp\n\n\n\n\n13185\n71\n95\n5\n880037203\n\n\n23391\n144\n509\n4\n882181859\n\n\n90744\n895\n50\n2\n887159951\n\n\n3043\n255\n279\n5\n882151167\n\n\n8932\n55\n94\n4\n892683274\n\n\n\n\n\n\n\n\ntest.head()\ny_true = test.rating\n\n\n\nCreating the model\n\nimport keras\nn_latent_factors_user = 8\nn_latent_factors_movie = 10\nn_latent_factors_mf = 3\nn_users, n_movies = len(dataset.user_id.unique()), len(dataset.item_id.unique())\n\nmovie_input = keras.layers.Input(shape=[1],name='Item')\nmovie_embedding_mlp = keras.layers.Embedding(n_movies + 1, n_latent_factors_movie, name='Movie-Embedding-MLP')(movie_input)\nmovie_vec_mlp = keras.layers.Flatten(name='FlattenMovies-MLP')(movie_embedding_mlp)\nmovie_vec_mlp = keras.layers.Dropout(0.2)(movie_vec_mlp)\n\nmovie_embedding_mf = keras.layers.Embedding(n_movies + 1, n_latent_factors_mf, name='Movie-Embedding-MF')(movie_input)\nmovie_vec_mf = keras.layers.Flatten(name='FlattenMovies-MF')(movie_embedding_mf)\nmovie_vec_mf = keras.layers.Dropout(0.2)(movie_vec_mf)\n\n\nuser_input = keras.layers.Input(shape=[1],name='User')\nuser_vec_mlp = keras.layers.Flatten(name='FlattenUsers-MLP')(keras.layers.Embedding(n_users + 1, n_latent_factors_user,name='User-Embedding-MLP')(user_input))\nuser_vec_mlp = keras.layers.Dropout(0.2)(user_vec_mlp)\n\nuser_vec_mf = keras.layers.Flatten(name='FlattenUsers-MF')(keras.layers.Embedding(n_users + 1, n_latent_factors_mf,name='User-Embedding-MF')(user_input))\nuser_vec_mf = keras.layers.Dropout(0.2)(user_vec_mf)\n\n\nconcat = keras.layers.merge([movie_vec_mlp, user_vec_mlp], mode='concat',name='Concat')\nconcat_dropout = keras.layers.Dropout(0.2)(concat)\ndense = keras.layers.Dense(200,name='FullyConnected')(concat_dropout)\ndense_batch = keras.layers.BatchNormalization(name='Batch')(dense)\ndropout_1 = keras.layers.Dropout(0.2,name='Dropout-1')(dense_batch)\ndense_2 = keras.layers.Dense(100,name='FullyConnected-1')(dropout_1)\ndense_batch_2 = keras.layers.BatchNormalization(name='Batch-2')(dense_2)\n\n\ndropout_2 = keras.layers.Dropout(0.2,name='Dropout-2')(dense_batch_2)\ndense_3 = keras.layers.Dense(50,name='FullyConnected-2')(dropout_2)\ndense_4 = keras.layers.Dense(20,name='FullyConnected-3', activation='relu')(dense_3)\n\npred_mf = keras.layers.merge([movie_vec_mf, user_vec_mf], mode='dot',name='Dot')\n\n\npred_mlp = keras.layers.Dense(1, activation='relu',name='Activation')(dense_4)\n\ncombine_mlp_mf = keras.layers.merge([pred_mf, pred_mlp], mode='concat',name='Concat-MF-MLP')\nresult_combine = keras.layers.Dense(100,name='Combine-MF-MLP')(combine_mlp_mf)\ndeep_combine = keras.layers.Dense(100,name='FullyConnected-4')(result_combine)\n\n\nresult = keras.layers.Dense(1,name='Prediction')(deep_combine)\n\n\nmodel = keras.Model([user_input, movie_input], result)\nopt = keras.optimizers.Adam(lr =0.01)\nmodel.compile(optimizer='adam',loss= 'mean_absolute_error')\n\nUsing TensorFlow backend.\n\n\nLet’s now see how our model looks like:\n\nfrom IPython.display import SVG\nfrom keras.utils.vis_utils import model_to_dot\nSVG(model_to_dot(model, show_shapes=False, show_layer_names=True, rankdir='HB').create(prog='dot', format='svg'))\n\n\n\n\n\n\n\n\nSo, it wasn’t very complicated to set up. Courtesy Keras, we can do even more complex stuff!\n\nmodel.summary()\n\n__________________________________________________________________________________________________\nLayer (type) Output Shape Param # Connected to \n==================================================================================================\nItem (InputLayer) (None, 1) 0 \n__________________________________________________________________________________________________\nUser (InputLayer) (None, 1) 0 \n__________________________________________________________________________________________________\nMovie-Embedding-MLP (Embedding) (None, 1, 10) 16830 Item[0][0] \n__________________________________________________________________________________________________\nUser-Embedding-MLP (Embedding) (None, 1, 8) 7552 User[0][0] \n__________________________________________________________________________________________________\nFlattenMovies-MLP (Flatten) (None, 10) 0 Movie-Embedding-MLP[0][0] \n__________________________________________________________________________________________________\nFlattenUsers-MLP (Flatten) (None, 8) 0 User-Embedding-MLP[0][0] \n__________________________________________________________________________________________________\ndropout_1 (Dropout) (None, 10) 0 FlattenMovies-MLP[0][0] \n__________________________________________________________________________________________________\ndropout_3 (Dropout) (None, 8) 0 FlattenUsers-MLP[0][0] \n__________________________________________________________________________________________________\nConcat (Merge) (None, 18) 0 dropout_1[0][0] \n dropout_3[0][0] \n__________________________________________________________________________________________________\ndropout_5 (Dropout) (None, 18) 0 Concat[0][0] \n__________________________________________________________________________________________________\nFullyConnected (Dense) (None, 200) 3800 dropout_5[0][0] \n__________________________________________________________________________________________________\nBatch (BatchNormalization) (None, 200) 800 FullyConnected[0][0] \n__________________________________________________________________________________________________\nDropout-1 (Dropout) (None, 200) 0 Batch[0][0] \n__________________________________________________________________________________________________\nFullyConnected-1 (Dense) (None, 100) 20100 Dropout-1[0][0] \n__________________________________________________________________________________________________\nBatch-2 (BatchNormalization) (None, 100) 400 FullyConnected-1[0][0] \n__________________________________________________________________________________________________\nMovie-Embedding-MF (Embedding) (None, 1, 3) 5049 Item[0][0] \n__________________________________________________________________________________________________\nUser-Embedding-MF (Embedding) (None, 1, 3) 2832 User[0][0] \n__________________________________________________________________________________________________\nDropout-2 (Dropout) (None, 100) 0 Batch-2[0][0] \n__________________________________________________________________________________________________\nFlattenMovies-MF (Flatten) (None, 3) 0 Movie-Embedding-MF[0][0] \n__________________________________________________________________________________________________\nFlattenUsers-MF (Flatten) (None, 3) 0 User-Embedding-MF[0][0] \n__________________________________________________________________________________________________\nFullyConnected-2 (Dense) (None, 50) 5050 Dropout-2[0][0] \n__________________________________________________________________________________________________\ndropout_2 (Dropout) (None, 3) 0 FlattenMovies-MF[0][0] \n__________________________________________________________________________________________________\ndropout_4 (Dropout) (None, 3) 0 FlattenUsers-MF[0][0] \n__________________________________________________________________________________________________\nFullyConnected-3 (Dense) (None, 20) 1020 FullyConnected-2[0][0] \n__________________________________________________________________________________________________\nDot (Merge) (None, 1) 0 dropout_2[0][0] \n dropout_4[0][0] \n__________________________________________________________________________________________________\nActivation (Dense) (None, 1) 21 FullyConnected-3[0][0] \n__________________________________________________________________________________________________\nConcat-MF-MLP (Merge) (None, 2) 0 Dot[0][0] \n Activation[0][0] \n__________________________________________________________________________________________________\nCombine-MF-MLP (Dense) (None, 100) 300 Concat-MF-MLP[0][0] \n__________________________________________________________________________________________________\nFullyConnected-4 (Dense) (None, 100) 10100 Combine-MF-MLP[0][0] \n__________________________________________________________________________________________________\nPrediction (Dense) (None, 1) 101 FullyConnected-4[0][0] \n==================================================================================================\nTotal params: 73,955\nTrainable params: 73,355\nNon-trainable params: 600\n__________________________________________________________________________________________________\n\n\nWe can see that the number of parameters is more than what we had in the Matrix Factorisation case. Let’s see how this model works. I’ll run it for more epochs given that we have more parameters.\n\nhistory = model.fit([train.user_id, train.item_id], train.rating, epochs=25, verbose=0, validation_split=0.1)\n\n\n\nPrediction performance of Neural Network based recommender system\n\nfrom sklearn.metrics import mean_absolute_error\ny_hat_2 = np.round(model.predict([test.user_id, test.item_id]),0)\nprint(mean_absolute_error(y_true, y_hat_2))\n\nprint(mean_absolute_error(y_true, model.predict([test.user_id, test.item_id])))\n\n\n0.716\n0.737380115688\n\n\nPretty similar to the result we got using matrix factorisation. This isn’t very optimised, and I am sure doing so, we can make this approach perform much better than GMF!\nThanks for reading. This post has been a good learning experience for me. Hope you enjoyed too!" }, { "objectID": "posts/2013-05-01-aggregation-timeseries.html", "href": "posts/2013-05-01-aggregation-timeseries.html", "title": "Aggregation in Timeseries using Pandas", "section": "", - "text": "We’ve all grown up studying groupy by and aggregations in SQL. Pandas provides excellent functionality for group by and aggregations. However, for time series data, we need a bit of manipulation. In this post, I’ll take a small example of weather time series data.\n\nimport pandas as pd\nimport matplotlib.pyplot as plt\nplt.style.use('ggplot')\n%matplotlib inline\n\n\ndf = pd.read_csv(\"weather.csv\", index_col=0, parse_dates=True).tz_localize(\"UTC\").tz_convert(\"US/Central\")\n\n\ndf.head()\n\n\n\n\n\n\n\n\nhumidity\ntemperature\n\n\n\n\n2015-01-01 00:00:00-06:00\n0.73\n38.74\n\n\n2015-01-01 01:00:00-06:00\n0.74\n38.56\n\n\n2015-01-01 02:00:00-06:00\n0.75\n38.56\n\n\n2015-01-01 03:00:00-06:00\n0.79\n37.97\n\n\n2015-01-01 04:00:00-06:00\n0.80\n37.78\n\n\n\n\n\n\n\n\n\nQuestion 1: What is the mean temperature and humidity per hour of the day?\nWe’ll create a new column in the df containing the hour information from the index.\n\ndf[\"hour\"] = df.index.hour\n\n\ndf.head()\n\n\n\n\n\n\n\n\nhumidity\ntemperature\nhour\n\n\n\n\n2015-01-01 00:00:00-06:00\n0.73\n38.74\n0\n\n\n2015-01-01 01:00:00-06:00\n0.74\n38.56\n1\n\n\n2015-01-01 02:00:00-06:00\n0.75\n38.56\n2\n\n\n2015-01-01 03:00:00-06:00\n0.79\n37.97\n3\n\n\n2015-01-01 04:00:00-06:00\n0.80\n37.78\n4\n\n\n\n\n\n\n\n\n\nmean_temp_humidity = df.groupby(\"hour\").mean()\nmean_temp_humidity.head()\n\n\n\n\n\n\n\n\nhumidity\ntemperature\n\n\nhour\n\n\n\n\n\n\n0\n0.779322\n45.976441\n\n\n1\n0.803898\n44.859492\n\n\n2\n0.812203\n44.244407\n\n\n3\n0.819153\n43.724068\n\n\n4\n0.832712\n43.105763\n\n\n\n\n\n\n\n\n\nmean_temp_humidity.plot(subplots=True);\n\n\n\n\n\n\n\n\nWe can use pivoting to achieve the same dataframe.\n\nmean_temp_humidity_pivoting = pd.pivot_table(df, index=[\"hour\"], values=[\"temperature\", \"humidity\"])\n\n\nmean_temp_humidity_pivoting.head()\n\n\n\n\n\n\n\n\nhumidity\ntemperature\n\n\nhour\n\n\n\n\n\n\n0\n0.779322\n45.976441\n\n\n1\n0.803898\n44.859492\n\n\n2\n0.812203\n44.244407\n\n\n3\n0.819153\n43.724068\n\n\n4\n0.832712\n43.105763\n\n\n\n\n\n\n\n\nBy default the aggregation function used in pivoting is mean.\n\n\nQuestion 2: Can we plot the daily variation in temperature per hour of the day?\nFor this, we want to have a dataframe with hour of day as the index and the different days as the different columns.\n\ndf[\"day\"] = df.index.dayofyear\n\n\ndf.head()\n\n\n\n\n\n\n\n\nhumidity\ntemperature\nhour\nday\n\n\n\n\n2015-01-01 00:00:00-06:00\n0.73\n38.74\n0\n1\n\n\n2015-01-01 01:00:00-06:00\n0.74\n38.56\n1\n1\n\n\n2015-01-01 02:00:00-06:00\n0.75\n38.56\n2\n1\n\n\n2015-01-01 03:00:00-06:00\n0.79\n37.97\n3\n1\n\n\n2015-01-01 04:00:00-06:00\n0.80\n37.78\n4\n1\n\n\n\n\n\n\n\n\n\ndaily_temp = pd.pivot_table(df, index=[\"hour\"], columns=[\"day\"], values=[\"temperature\"])\n\n\ndaily_temp.head()\n\n\n\n\n\n\n\n\ntemperature\n\n\nday\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n...\n50\n51\n52\n53\n54\n55\n56\n57\n58\n59\n\n\nhour\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n0\n38.74\n39.94\n39.57\n41.83\n33.95\n36.98\n46.93\n29.95\n36.57\n36.19\n...\n46.17\n54.01\n66.57\n55.49\n37.68\n30.34\n34.97\n39.93\n36.19\n32.25\n\n\n1\n38.56\n39.76\n39.75\n40.85\n32.29\n35.89\n45.33\n28.55\n37.31\n36.40\n...\n41.38\n54.56\n66.57\n55.49\n36.76\n30.04\n34.97\n36.37\n36.38\n32.25\n\n\n2\n38.56\n39.58\n39.94\n39.73\n31.59\n36.44\n44.51\n27.44\n37.78\n36.59\n...\n39.99\n55.81\n66.57\n55.34\n35.56\n30.57\n34.75\n34.74\n36.20\n32.25\n\n\n3\n37.97\n38.83\n40.16\n38.78\n30.48\n36.85\n43.92\n25.97\n37.97\n36.38\n...\n39.05\n57.14\n66.38\n55.27\n34.94\n30.59\n35.15\n34.31\n36.20\n32.52\n\n\n4\n37.78\n39.02\n40.65\n39.74\n29.89\n35.72\n44.37\n24.74\n37.82\n35.49\n...\n37.99\n57.51\n66.57\n55.49\n34.04\n30.38\n35.15\n33.02\n34.49\n32.52\n\n\n\n\n5 rows × 59 columns\n\n\n\n\n\ndaily_temp.plot(style='k-', alpha=0.3, legend=False)\nplt.ylabel(\"Temp\");\n\n\n\n\n\n\n\n\nSo, we can see some pattern up there! Around 15 hours, the temperature usually peaks.\nThere you go! Some recipes for aggregation and plotting of time series data." + "text": "We’ve all grown up studying groupy by and aggregations in SQL. Pandas provides excellent functionality for group by and aggregations. However, for time series data, we need a bit of manipulation. In this post, I’ll take a small example of weather time series data.\n\nimport pandas as pd\nimport matplotlib.pyplot as plt\nplt.style.use('ggplot')\n%matplotlib inline\n\n\ndf = pd.read_csv(\"weather.csv\", index_col=0, parse_dates=True).tz_localize(\"UTC\").tz_convert(\"US/Central\")\n\n\ndf.head()\n\n\n\n\n\n\n\nhumidity\ntemperature\n\n\n\n\n2015-01-01 00:00:00-06:00\n0.73\n38.74\n\n\n2015-01-01 01:00:00-06:00\n0.74\n38.56\n\n\n2015-01-01 02:00:00-06:00\n0.75\n38.56\n\n\n2015-01-01 03:00:00-06:00\n0.79\n37.97\n\n\n2015-01-01 04:00:00-06:00\n0.80\n37.78\n\n\n\n\n\n\n\n\nQuestion 1: What is the mean temperature and humidity per hour of the day?\nWe’ll create a new column in the df containing the hour information from the index.\n\ndf[\"hour\"] = df.index.hour\n\n\ndf.head()\n\n\n\n\n\n\n\nhumidity\ntemperature\nhour\n\n\n\n\n2015-01-01 00:00:00-06:00\n0.73\n38.74\n0\n\n\n2015-01-01 01:00:00-06:00\n0.74\n38.56\n1\n\n\n2015-01-01 02:00:00-06:00\n0.75\n38.56\n2\n\n\n2015-01-01 03:00:00-06:00\n0.79\n37.97\n3\n\n\n2015-01-01 04:00:00-06:00\n0.80\n37.78\n4\n\n\n\n\n\n\n\n\nmean_temp_humidity = df.groupby(\"hour\").mean()\nmean_temp_humidity.head()\n\n\n\n\n\n\n\nhumidity\ntemperature\n\n\nhour\n\n\n\n\n\n\n0\n0.779322\n45.976441\n\n\n1\n0.803898\n44.859492\n\n\n2\n0.812203\n44.244407\n\n\n3\n0.819153\n43.724068\n\n\n4\n0.832712\n43.105763\n\n\n\n\n\n\n\n\nmean_temp_humidity.plot(subplots=True);\n\n\n\n\n\n\n\n\nWe can use pivoting to achieve the same dataframe.\n\nmean_temp_humidity_pivoting = pd.pivot_table(df, index=[\"hour\"], values=[\"temperature\", \"humidity\"])\n\n\nmean_temp_humidity_pivoting.head()\n\n\n\n\n\n\n\nhumidity\ntemperature\n\n\nhour\n\n\n\n\n\n\n0\n0.779322\n45.976441\n\n\n1\n0.803898\n44.859492\n\n\n2\n0.812203\n44.244407\n\n\n3\n0.819153\n43.724068\n\n\n4\n0.832712\n43.105763\n\n\n\n\n\n\n\nBy default the aggregation function used in pivoting is mean.\n\n\nQuestion 2: Can we plot the daily variation in temperature per hour of the day?\nFor this, we want to have a dataframe with hour of day as the index and the different days as the different columns.\n\ndf[\"day\"] = df.index.dayofyear\n\n\ndf.head()\n\n\n\n\n\n\n\nhumidity\ntemperature\nhour\nday\n\n\n\n\n2015-01-01 00:00:00-06:00\n0.73\n38.74\n0\n1\n\n\n2015-01-01 01:00:00-06:00\n0.74\n38.56\n1\n1\n\n\n2015-01-01 02:00:00-06:00\n0.75\n38.56\n2\n1\n\n\n2015-01-01 03:00:00-06:00\n0.79\n37.97\n3\n1\n\n\n2015-01-01 04:00:00-06:00\n0.80\n37.78\n4\n1\n\n\n\n\n\n\n\n\ndaily_temp = pd.pivot_table(df, index=[\"hour\"], columns=[\"day\"], values=[\"temperature\"])\n\n\ndaily_temp.head()\n\n\n\n\n\n\n\ntemperature\n\n\nday\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n...\n50\n51\n52\n53\n54\n55\n56\n57\n58\n59\n\n\nhour\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n0\n38.74\n39.94\n39.57\n41.83\n33.95\n36.98\n46.93\n29.95\n36.57\n36.19\n...\n46.17\n54.01\n66.57\n55.49\n37.68\n30.34\n34.97\n39.93\n36.19\n32.25\n\n\n1\n38.56\n39.76\n39.75\n40.85\n32.29\n35.89\n45.33\n28.55\n37.31\n36.40\n...\n41.38\n54.56\n66.57\n55.49\n36.76\n30.04\n34.97\n36.37\n36.38\n32.25\n\n\n2\n38.56\n39.58\n39.94\n39.73\n31.59\n36.44\n44.51\n27.44\n37.78\n36.59\n...\n39.99\n55.81\n66.57\n55.34\n35.56\n30.57\n34.75\n34.74\n36.20\n32.25\n\n\n3\n37.97\n38.83\n40.16\n38.78\n30.48\n36.85\n43.92\n25.97\n37.97\n36.38\n...\n39.05\n57.14\n66.38\n55.27\n34.94\n30.59\n35.15\n34.31\n36.20\n32.52\n\n\n4\n37.78\n39.02\n40.65\n39.74\n29.89\n35.72\n44.37\n24.74\n37.82\n35.49\n...\n37.99\n57.51\n66.57\n55.49\n34.04\n30.38\n35.15\n33.02\n34.49\n32.52\n\n\n\n\n5 rows × 59 columns\n\n\n\n\ndaily_temp.plot(style='k-', alpha=0.3, legend=False)\nplt.ylabel(\"Temp\");\n\n\n\n\n\n\n\n\nSo, we can see some pattern up there! Around 15 hours, the temperature usually peaks.\nThere you go! Some recipes for aggregation and plotting of time series data." }, { "objectID": "posts/2017-12-18-recommend-keras.html", "href": "posts/2017-12-18-recommend-keras.html", "title": "Recommender Systems in Keras", "section": "", - "text": "I have written a few posts earlier about matrix factorisation using various Python libraries. The main application I had in mind for matrix factorisation was recommender systems. In this post, I’ll write about using Keras for creating recommender systems. Various people have written excellent similar posts and code that I draw a lot of inspiration from, and give them their credit! I’m assuming that a reader has some experience with Keras, as this post is not intended to be an introduction to Keras.\nSpecifically, in this post, I’ll talk about:\n\nMatrix Factorisation in Keras\nAdding non-negativitiy constraints to solve non-negative matrix factorisation (NNMF)\nUsing neural networks for recommendations\n\nI’ll be using the Movielens-100k dataset for illustration. There are 943 users and 1682 movies. In total there are a 100k ratings in the dataset. It should be noted that the max. total number of rating for the <users, movies> would be 943*1682, which means that we have about 7% of the total ratings! All rating are on a scale of 1-5.\n\nTask\nGiven this set of ratings, can we recommend the next set of movies to a user? This would translate to: for every user, estimating the ratings for all the movies that (s)he hasn’t watched and maybe recommend the top-k movies by the esimtated ratings!\n\n\nPeak into the dataset\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport warnings\nwarnings.filterwarnings('ignore')\n\n%matplotlib inline\n\n\ndataset = pd.read_csv(\"/Users/nipun/Downloads/ml-100k/u.data\",sep='\\t',names=\"user_id,item_id,rating,timestamp\".split(\",\"))\n\n\ndataset.head()\n\n\n\n\n\n\n\n\n\nuser_id\nitem_id\nrating\ntimestamp\n\n\n\n\n0\n196\n242\n3\n881250949\n\n\n1\n186\n302\n3\n891717742\n\n\n2\n22\n377\n1\n878887116\n\n\n3\n244\n51\n2\n880606923\n\n\n4\n166\n346\n1\n886397596\n\n\n\n\n\n\n\n\nSo, each record (row) shows the rating for a user, item (movie) pair. It should be noted that I use item and movie interchangeably in this post.\n\nlen(dataset.user_id.unique()), len(dataset.item_id.unique())\n\n(943, 1682)\n\n\nWe assign a unique number between (0, #users) to each user and do the same for movies.\n\ndataset.user_id = dataset.user_id.astype('category').cat.codes.values\ndataset.item_id = dataset.item_id.astype('category').cat.codes.values\n\n\ndataset.head()\n\n\n\n\n\n\n\n\n\nuser_id\nitem_id\nrating\ntimestamp\n\n\n\n\n0\n195\n241\n3\n881250949\n\n\n1\n185\n301\n3\n891717742\n\n\n2\n21\n376\n1\n878887116\n\n\n3\n243\n50\n2\n880606923\n\n\n4\n165\n345\n1\n886397596\n\n\n\n\n\n\n\n\n\n\nTrain test split\nWe’ll now split our dataset of 100k ratings into train (containing 80k ratings) and test (containing 20k ratings). Given the train set, we’d like to accurately estimate the ratings in the test set.\n\nfrom sklearn.model_selection import train_test_split\ntrain, test = train_test_split(dataset, test_size=0.2)\n\n\ntrain.head()\n\n\n\n\n\n\n\n\n\nuser_id\nitem_id\nrating\ntimestamp\n\n\n\n\n90092\n832\n12\n2\n875036139\n\n\n50879\n94\n132\n3\n888954341\n\n\n67994\n436\n12\n4\n880141129\n\n\n49769\n710\n344\n4\n884485683\n\n\n11032\n121\n736\n4\n879270874\n\n\n\n\n\n\n\n\n\ntest.head()\n\n\n\n\n\n\n\n\n\nuser_id\nitem_id\nrating\ntimestamp\n\n\n\n\n89284\n907\n493\n3\n879723046\n\n\n60499\n550\n25\n4\n892785056\n\n\n11090\n373\n222\n5\n880394520\n\n\n36096\n199\n140\n4\n884129346\n\n\n21633\n71\n317\n5\n880037702\n\n\n\n\n\n\n\n\n\n\nMatrix factorisation\nOne popular recommender systems approach is called Matrix Factorisation. It works on the principle that we can learn a low-dimensional representation (embedding) of user and movie. For example, for each movie, we can have how much action it has, how long it is, and so on. For each user, we can encode how much they like action, or how much they like long movies, etc. Thus, we can combine the user and the movie embeddings to estimate the ratings on unseen movies. This approach can also be viewed as: given a matrix (A [M X N]) containing users and movies, we want to estimate low dimensional matrices (W [M X k] and H [M X k]), such that: \\(A \\approx W.H^T\\)\n\n\nMatrix factorisation in Keras\nWe’ll now write some code to solve the recommendation problem by matrix factorisation in Keras. We’re trying to learn two low-dimensional embeddings of users and items.\n\nimport keras\nfrom IPython.display import SVG\nfrom keras.optimizers import Adam\nfrom keras.utils.vis_utils import model_to_dot\nn_users, n_movies = len(dataset.user_id.unique()), len(dataset.item_id.unique())\nn_latent_factors = 3\n\nUsing TensorFlow backend.\n\n\nThe key thing is to learn an embedding for movies and users, and then combine them using the dot product! For estimating the rating, for each user, movie pair of interest, we’d take the dot product of the respective user and item embedding. As an example, if we have 2 dimensions in our user and item embedding, which say correspond to [how much user likes action, how much user likes long movies], and the item embedding is [how much action is in the movie, how long is the movie]. Then, we can predict for a user u, and movie m as how much u likes action \\(\\times\\) how much action is there in m \\(+\\) how much u likes long movies \\(\\times\\) how long is m.\nOur model would optimise the emebedding such that we minimise the mean squared error on the ratings from the train set.\n\nmovie_input = keras.layers.Input(shape=[1],name='Item')\nmovie_embedding = keras.layers.Embedding(n_movies + 1, n_latent_factors, name='Movie-Embedding')(movie_input)\nmovie_vec = keras.layers.Flatten(name='FlattenMovies')(movie_embedding)\n\nuser_input = keras.layers.Input(shape=[1],name='User')\nuser_vec = keras.layers.Flatten(name='FlattenUsers')(keras.layers.Embedding(n_users + 1, n_latent_factors,name='User-Embedding')(user_input))\n\nprod = keras.layers.merge([movie_vec, user_vec], mode='dot',name='DotProduct')\nmodel = keras.Model([user_input, movie_input], prod)\nmodel.compile('adam', 'mean_squared_error')\n\nHere’s a visualisation of our model for a better understanding.\n\nSVG(model_to_dot(model, show_shapes=True, show_layer_names=True, rankdir='HB').create(prog='dot', format='svg'))\n\n\n\n\n\n\n\n\nWe can see that in the Merge layer, we take the dot product of the user and the item embeddings to obtain the rating.\nWe can also summarise our model as follows:\n\nmodel.summary()\n\n__________________________________________________________________________________________________\nLayer (type) Output Shape Param # Connected to \n==================================================================================================\nItem (InputLayer) (None, 1) 0 \n__________________________________________________________________________________________________\nUser (InputLayer) (None, 1) 0 \n__________________________________________________________________________________________________\nMovie-Embedding (Embedding) (None, 1, 3) 5049 Item[0][0] \n__________________________________________________________________________________________________\nUser-Embedding (Embedding) (None, 1, 3) 2832 User[0][0] \n__________________________________________________________________________________________________\nFlattenMovies (Flatten) (None, 3) 0 Movie-Embedding[0][0] \n__________________________________________________________________________________________________\nFlattenUsers (Flatten) (None, 3) 0 User-Embedding[0][0] \n__________________________________________________________________________________________________\nDotProduct (Merge) (None, 1) 0 FlattenMovies[0][0] \n FlattenUsers[0][0] \n==================================================================================================\nTotal params: 7,881\nTrainable params: 7,881\nNon-trainable params: 0\n__________________________________________________________________________________________________\n\n\nSo, we have 7881 parameters to learn! Let’s train our model now!\n\nhistory = model.fit([train.user_id, train.item_id], train.rating, epochs=100, verbose=0)\n\n\nTrain error v/s epoch number\nBefore we test how well our model does in the test setting, we can visualise the train loss with epoch number.\n\npd.Series(history.history['loss']).plot(logy=True)\nplt.xlabel(\"Epoch\")\nplt.ylabel(\"Train Error\")\n\n\n\n\n\n\n\n\n\n\nPrediction error\nLet’s now see how our model does! I’ll do a small post-processing step to round off our prediction to the nearest integer. This is usually not done, and thus just a whimsical step, since the training ratings are all integers! There are better ways to encode this intger requirement (one-hot encoding!), but we won’t discuss them in this post.\n\ny_hat = np.round(model.predict([test.user_id, test.item_id]),0)\ny_true = test.rating\n\n\nfrom sklearn.metrics import mean_absolute_error\nmean_absolute_error(y_true, y_hat)\n\n0.6915\n\n\nNot bad! We’re able to get a \\(MAE\\) of 0.69! I’m sure with a bit of parameter/hyper-parameter optimisation, we may be able to improve the results. However, I won’t talk about these optimisations in this post.\n\n\nExtracting the learnt embeddings\nWe can extract the learnt movie and item embeddings as follows:\n\nmovie_embedding_learnt = model.get_layer(name='Movie-Embedding').get_weights()[0]\npd.DataFrame(movie_embedding_learnt).describe()\n\n\n\n\n\n\n\n\n\n0\n1\n2\n\n\n\n\ncount\n1683.000000\n1683.000000\n1683.000000\n\n\nmean\n-0.935420\n0.857862\n0.954169\n\n\nstd\n0.517458\n0.447439\n0.458095\n\n\nmin\n-2.524487\n-0.459752\n-0.989537\n\n\n25%\n-1.323431\n0.546364\n0.642444\n\n\n50%\n-0.949188\n0.851243\n0.993619\n\n\n75%\n-0.550862\n1.159588\n1.283555\n\n\nmax\n0.500618\n2.140607\n2.683658\n\n\n\n\n\n\n\n\n\nuser_embedding_learnt = model.get_layer(name='User-Embedding').get_weights()[0]\npd.DataFrame(user_embedding_learnt).describe()\n\n\n\n\n\n\n\n\n\n0\n1\n2\n\n\n\n\ncount\n944.000000\n944.000000\n944.000000\n\n\nmean\n-1.126231\n1.171609\n1.109131\n\n\nstd\n0.517478\n0.409016\n0.548384\n\n\nmin\n-2.883226\n-0.500010\n-0.415373\n\n\n25%\n-1.458197\n0.903574\n0.735729\n\n\n50%\n-1.159480\n1.199517\n1.084089\n\n\n75%\n-0.836746\n1.456610\n1.468611\n\n\nmax\n0.899436\n2.605330\n2.826109\n\n\n\n\n\n\n\n\nWe can see that both the user and the item embeddings have negative elements. There are some applications which require that the learnt embeddings be non-negative. This approach is also called non-negative matrix factorisation, which we’ll workout now.\n\n\n\nNon-negative Matrix factorisation (NNMF) in Keras\nThe code for NNMF remains exactly the same as the code for matrix factorisation. The only change is that we add non-negativity constraints on the learnt embeddings. This is done as follows:\n\nfrom keras.constraints import non_neg\nmovie_input = keras.layers.Input(shape=[1],name='Item')\nmovie_embedding = keras.layers.Embedding(n_movies + 1, n_latent_factors, name='NonNegMovie-Embedding', embeddings_constraint=non_neg())(movie_input)\nmovie_vec = keras.layers.Flatten(name='FlattenMovies')(movie_embedding)\n\nuser_input = keras.layers.Input(shape=[1],name='User')\nuser_vec = keras.layers.Flatten(name='FlattenUsers')(keras.layers.Embedding(n_users + 1, n_latent_factors,name='NonNegUser-Embedding',embeddings_constraint=non_neg())(user_input))\n\nprod = keras.layers.merge([movie_vec, user_vec], mode='dot',name='DotProduct')\nmodel = keras.Model([user_input, movie_input], prod)\nmodel.compile('adam', 'mean_squared_error')\n\nWe now verify if we are indeed able to learn non-negative embeddings. I’ll not compare the performance of NNMF on the test set, in the interest of space.\n\nhistory_nonneg = model.fit([train.user_id, train.item_id], train.rating, epochs=10, verbose=0)\n\n\nmovie_embedding_learnt = model.get_layer(name='NonNegMovie-Embedding').get_weights()[0]\npd.DataFrame(movie_embedding_learnt).describe()\n\n\n\n\n\n\n\n\n\n0\n1\n2\n\n\n\n\ncount\n1683.000000\n1683.000000\n1683.000000\n\n\nmean\n0.838450\n0.840330\n0.838066\n\n\nstd\n0.301618\n0.301529\n0.301040\n\n\nmin\n-0.000000\n-0.000000\n-0.000000\n\n\n25%\n0.657749\n0.663951\n0.656453\n\n\n50%\n0.901495\n0.904192\n0.895934\n\n\n75%\n1.072706\n1.073591\n1.072926\n\n\nmax\n1.365719\n1.379006\n1.373672\n\n\n\n\n\n\n\n\nLooks good!\n\n\nNeural networks for recommendation\nWe’ll now create a simple neural network for recommendation, or for estimating rating! This model is very similar to the earlier matrix factorisation models, but differs in the following ways:\n\nInstead of taking a dot product of the user and the item embedding, we concatenate them and use them as features for our neural network. Thus, we are not constrained to the dot product way of combining the embeddings, and can learn complex non-linear relationships.\nDue to #1, we can now have a different dimension of user and item embeddings. This can be useful if one dimension is larger than the other.\n\n\nn_latent_factors_user = 5\nn_latent_factors_movie = 8\n\nmovie_input = keras.layers.Input(shape=[1],name='Item')\nmovie_embedding = keras.layers.Embedding(n_movies + 1, n_latent_factors_movie, name='Movie-Embedding')(movie_input)\nmovie_vec = keras.layers.Flatten(name='FlattenMovies')(movie_embedding)\nmovie_vec = keras.layers.Dropout(0.2)(movie_vec)\n\n\nuser_input = keras.layers.Input(shape=[1],name='User')\nuser_vec = keras.layers.Flatten(name='FlattenUsers')(keras.layers.Embedding(n_users + 1, n_latent_factors_user,name='User-Embedding')(user_input))\nuser_vec = keras.layers.Dropout(0.2)(user_vec)\n\n\nconcat = keras.layers.merge([movie_vec, user_vec], mode='concat',name='Concat')\nconcat_dropout = keras.layers.Dropout(0.2)(concat)\ndense = keras.layers.Dense(200,name='FullyConnected')(concat)\ndropout_1 = keras.layers.Dropout(0.2,name='Dropout')(dense)\ndense_2 = keras.layers.Dense(100,name='FullyConnected-1')(concat)\ndropout_2 = keras.layers.Dropout(0.2,name='Dropout')(dense_2)\ndense_3 = keras.layers.Dense(50,name='FullyConnected-2')(dense_2)\ndropout_3 = keras.layers.Dropout(0.2,name='Dropout')(dense_3)\ndense_4 = keras.layers.Dense(20,name='FullyConnected-3', activation='relu')(dense_3)\n\n\nresult = keras.layers.Dense(1, activation='relu',name='Activation')(dense_4)\nadam = Adam(lr=0.005)\nmodel = keras.Model([user_input, movie_input], result)\nmodel.compile(optimizer=adam,loss= 'mean_absolute_error')\n\nLet’s now see how our model looks like:\n\nSVG(model_to_dot(model, show_shapes=True, show_layer_names=True, rankdir='HB').create(prog='dot', format='svg'))\n\n\n\n\n\n\n\n\nIt should be noted that we use a different number of embeddings for user (3) and items (5)! These combine to form a vector of length (5+3 = 8), which is then fed into the neural network. We also add a dropout layer to prevent overfitting!\n\nmodel.summary()\n\n__________________________________________________________________________________________________\nLayer (type) Output Shape Param # Connected to \n==================================================================================================\nItem (InputLayer) (None, 1) 0 \n__________________________________________________________________________________________________\nUser (InputLayer) (None, 1) 0 \n__________________________________________________________________________________________________\nMovie-Embedding (Embedding) (None, 1, 8) 13464 Item[0][0] \n__________________________________________________________________________________________________\nUser-Embedding (Embedding) (None, 1, 5) 4720 User[0][0] \n__________________________________________________________________________________________________\nFlattenMovies (Flatten) (None, 8) 0 Movie-Embedding[0][0] \n__________________________________________________________________________________________________\nFlattenUsers (Flatten) (None, 5) 0 User-Embedding[0][0] \n__________________________________________________________________________________________________\ndropout_1 (Dropout) (None, 8) 0 FlattenMovies[0][0] \n__________________________________________________________________________________________________\ndropout_2 (Dropout) (None, 5) 0 FlattenUsers[0][0] \n__________________________________________________________________________________________________\nConcat (Merge) (None, 13) 0 dropout_1[0][0] \n dropout_2[0][0] \n__________________________________________________________________________________________________\nFullyConnected-1 (Dense) (None, 100) 1400 Concat[0][0] \n__________________________________________________________________________________________________\nFullyConnected-2 (Dense) (None, 50) 5050 FullyConnected-1[0][0] \n__________________________________________________________________________________________________\nFullyConnected-3 (Dense) (None, 20) 1020 FullyConnected-2[0][0] \n__________________________________________________________________________________________________\nActivation (Dense) (None, 1) 21 FullyConnected-3[0][0] \n==================================================================================================\nTotal params: 25,675\nTrainable params: 25,675\nNon-trainable params: 0\n__________________________________________________________________________________________________\n\n\nWe can see that the number of parameters is more than what we had in the Matrix Factorisation case. Let’s see how this model works. I’ll run it for more epochs given that we have more parameters.\n\nhistory = model.fit([train.user_id, train.item_id], train.rating, epochs=250, verbose=0)\n\n\nPrediction performance of Neural Network based recommender system\n\ny_hat_2 = np.round(model.predict([test.user_id, test.item_id]),0)\nprint(mean_absolute_error(y_true, y_hat_2))\n\nprint(mean_absolute_error(y_true, model.predict([test.user_id, test.item_id])))\n\n\n0.6957\n0.708807692927\n\n\nPretty similar to the result we got using matrix factorisation. Maybe, we need to tweak around a lot more with the neural network to get better results?\nThanks for reading. This post has been a good learning experience for me. Hope you enjoyed too!" + "text": "I have written a few posts earlier about matrix factorisation using various Python libraries. The main application I had in mind for matrix factorisation was recommender systems. In this post, I’ll write about using Keras for creating recommender systems. Various people have written excellent similar posts and code that I draw a lot of inspiration from, and give them their credit! I’m assuming that a reader has some experience with Keras, as this post is not intended to be an introduction to Keras.\nSpecifically, in this post, I’ll talk about:\n\nMatrix Factorisation in Keras\nAdding non-negativitiy constraints to solve non-negative matrix factorisation (NNMF)\nUsing neural networks for recommendations\n\nI’ll be using the Movielens-100k dataset for illustration. There are 943 users and 1682 movies. In total there are a 100k ratings in the dataset. It should be noted that the max. total number of rating for the <users, movies> would be 943*1682, which means that we have about 7% of the total ratings! All rating are on a scale of 1-5.\n\nTask\nGiven this set of ratings, can we recommend the next set of movies to a user? This would translate to: for every user, estimating the ratings for all the movies that (s)he hasn’t watched and maybe recommend the top-k movies by the esimtated ratings!\n\n\nPeak into the dataset\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport warnings\nwarnings.filterwarnings('ignore')\n\n%matplotlib inline\n\n\ndataset = pd.read_csv(\"/Users/nipun/Downloads/ml-100k/u.data\",sep='\\t',names=\"user_id,item_id,rating,timestamp\".split(\",\"))\n\n\ndataset.head()\n\n\n\n\n\n\n\n\nuser_id\nitem_id\nrating\ntimestamp\n\n\n\n\n0\n196\n242\n3\n881250949\n\n\n1\n186\n302\n3\n891717742\n\n\n2\n22\n377\n1\n878887116\n\n\n3\n244\n51\n2\n880606923\n\n\n4\n166\n346\n1\n886397596\n\n\n\n\n\n\n\nSo, each record (row) shows the rating for a user, item (movie) pair. It should be noted that I use item and movie interchangeably in this post.\n\nlen(dataset.user_id.unique()), len(dataset.item_id.unique())\n\n(943, 1682)\n\n\nWe assign a unique number between (0, #users) to each user and do the same for movies.\n\ndataset.user_id = dataset.user_id.astype('category').cat.codes.values\ndataset.item_id = dataset.item_id.astype('category').cat.codes.values\n\n\ndataset.head()\n\n\n\n\n\n\n\n\nuser_id\nitem_id\nrating\ntimestamp\n\n\n\n\n0\n195\n241\n3\n881250949\n\n\n1\n185\n301\n3\n891717742\n\n\n2\n21\n376\n1\n878887116\n\n\n3\n243\n50\n2\n880606923\n\n\n4\n165\n345\n1\n886397596\n\n\n\n\n\n\n\n\n\nTrain test split\nWe’ll now split our dataset of 100k ratings into train (containing 80k ratings) and test (containing 20k ratings). Given the train set, we’d like to accurately estimate the ratings in the test set.\n\nfrom sklearn.model_selection import train_test_split\ntrain, test = train_test_split(dataset, test_size=0.2)\n\n\ntrain.head()\n\n\n\n\n\n\n\n\nuser_id\nitem_id\nrating\ntimestamp\n\n\n\n\n90092\n832\n12\n2\n875036139\n\n\n50879\n94\n132\n3\n888954341\n\n\n67994\n436\n12\n4\n880141129\n\n\n49769\n710\n344\n4\n884485683\n\n\n11032\n121\n736\n4\n879270874\n\n\n\n\n\n\n\n\ntest.head()\n\n\n\n\n\n\n\n\nuser_id\nitem_id\nrating\ntimestamp\n\n\n\n\n89284\n907\n493\n3\n879723046\n\n\n60499\n550\n25\n4\n892785056\n\n\n11090\n373\n222\n5\n880394520\n\n\n36096\n199\n140\n4\n884129346\n\n\n21633\n71\n317\n5\n880037702\n\n\n\n\n\n\n\n\n\nMatrix factorisation\nOne popular recommender systems approach is called Matrix Factorisation. It works on the principle that we can learn a low-dimensional representation (embedding) of user and movie. For example, for each movie, we can have how much action it has, how long it is, and so on. For each user, we can encode how much they like action, or how much they like long movies, etc. Thus, we can combine the user and the movie embeddings to estimate the ratings on unseen movies. This approach can also be viewed as: given a matrix (A [M X N]) containing users and movies, we want to estimate low dimensional matrices (W [M X k] and H [M X k]), such that: \\(A \\approx W.H^T\\)\n\n\nMatrix factorisation in Keras\nWe’ll now write some code to solve the recommendation problem by matrix factorisation in Keras. We’re trying to learn two low-dimensional embeddings of users and items.\n\nimport keras\nfrom IPython.display import SVG\nfrom keras.optimizers import Adam\nfrom keras.utils.vis_utils import model_to_dot\nn_users, n_movies = len(dataset.user_id.unique()), len(dataset.item_id.unique())\nn_latent_factors = 3\n\nUsing TensorFlow backend.\n\n\nThe key thing is to learn an embedding for movies and users, and then combine them using the dot product! For estimating the rating, for each user, movie pair of interest, we’d take the dot product of the respective user and item embedding. As an example, if we have 2 dimensions in our user and item embedding, which say correspond to [how much user likes action, how much user likes long movies], and the item embedding is [how much action is in the movie, how long is the movie]. Then, we can predict for a user u, and movie m as how much u likes action \\(\\times\\) how much action is there in m \\(+\\) how much u likes long movies \\(\\times\\) how long is m.\nOur model would optimise the emebedding such that we minimise the mean squared error on the ratings from the train set.\n\nmovie_input = keras.layers.Input(shape=[1],name='Item')\nmovie_embedding = keras.layers.Embedding(n_movies + 1, n_latent_factors, name='Movie-Embedding')(movie_input)\nmovie_vec = keras.layers.Flatten(name='FlattenMovies')(movie_embedding)\n\nuser_input = keras.layers.Input(shape=[1],name='User')\nuser_vec = keras.layers.Flatten(name='FlattenUsers')(keras.layers.Embedding(n_users + 1, n_latent_factors,name='User-Embedding')(user_input))\n\nprod = keras.layers.merge([movie_vec, user_vec], mode='dot',name='DotProduct')\nmodel = keras.Model([user_input, movie_input], prod)\nmodel.compile('adam', 'mean_squared_error')\n\nHere’s a visualisation of our model for a better understanding.\n\nSVG(model_to_dot(model, show_shapes=True, show_layer_names=True, rankdir='HB').create(prog='dot', format='svg'))\n\n\n\n\n\n\n\n\nWe can see that in the Merge layer, we take the dot product of the user and the item embeddings to obtain the rating.\nWe can also summarise our model as follows:\n\nmodel.summary()\n\n__________________________________________________________________________________________________\nLayer (type) Output Shape Param # Connected to \n==================================================================================================\nItem (InputLayer) (None, 1) 0 \n__________________________________________________________________________________________________\nUser (InputLayer) (None, 1) 0 \n__________________________________________________________________________________________________\nMovie-Embedding (Embedding) (None, 1, 3) 5049 Item[0][0] \n__________________________________________________________________________________________________\nUser-Embedding (Embedding) (None, 1, 3) 2832 User[0][0] \n__________________________________________________________________________________________________\nFlattenMovies (Flatten) (None, 3) 0 Movie-Embedding[0][0] \n__________________________________________________________________________________________________\nFlattenUsers (Flatten) (None, 3) 0 User-Embedding[0][0] \n__________________________________________________________________________________________________\nDotProduct (Merge) (None, 1) 0 FlattenMovies[0][0] \n FlattenUsers[0][0] \n==================================================================================================\nTotal params: 7,881\nTrainable params: 7,881\nNon-trainable params: 0\n__________________________________________________________________________________________________\n\n\nSo, we have 7881 parameters to learn! Let’s train our model now!\n\nhistory = model.fit([train.user_id, train.item_id], train.rating, epochs=100, verbose=0)\n\n\nTrain error v/s epoch number\nBefore we test how well our model does in the test setting, we can visualise the train loss with epoch number.\n\npd.Series(history.history['loss']).plot(logy=True)\nplt.xlabel(\"Epoch\")\nplt.ylabel(\"Train Error\")\n\n\n\n\n\n\n\n\n\n\nPrediction error\nLet’s now see how our model does! I’ll do a small post-processing step to round off our prediction to the nearest integer. This is usually not done, and thus just a whimsical step, since the training ratings are all integers! There are better ways to encode this intger requirement (one-hot encoding!), but we won’t discuss them in this post.\n\ny_hat = np.round(model.predict([test.user_id, test.item_id]),0)\ny_true = test.rating\n\n\nfrom sklearn.metrics import mean_absolute_error\nmean_absolute_error(y_true, y_hat)\n\n0.6915\n\n\nNot bad! We’re able to get a \\(MAE\\) of 0.69! I’m sure with a bit of parameter/hyper-parameter optimisation, we may be able to improve the results. However, I won’t talk about these optimisations in this post.\n\n\nExtracting the learnt embeddings\nWe can extract the learnt movie and item embeddings as follows:\n\nmovie_embedding_learnt = model.get_layer(name='Movie-Embedding').get_weights()[0]\npd.DataFrame(movie_embedding_learnt).describe()\n\n\n\n\n\n\n\n\n0\n1\n2\n\n\n\n\ncount\n1683.000000\n1683.000000\n1683.000000\n\n\nmean\n-0.935420\n0.857862\n0.954169\n\n\nstd\n0.517458\n0.447439\n0.458095\n\n\nmin\n-2.524487\n-0.459752\n-0.989537\n\n\n25%\n-1.323431\n0.546364\n0.642444\n\n\n50%\n-0.949188\n0.851243\n0.993619\n\n\n75%\n-0.550862\n1.159588\n1.283555\n\n\nmax\n0.500618\n2.140607\n2.683658\n\n\n\n\n\n\n\n\nuser_embedding_learnt = model.get_layer(name='User-Embedding').get_weights()[0]\npd.DataFrame(user_embedding_learnt).describe()\n\n\n\n\n\n\n\n\n0\n1\n2\n\n\n\n\ncount\n944.000000\n944.000000\n944.000000\n\n\nmean\n-1.126231\n1.171609\n1.109131\n\n\nstd\n0.517478\n0.409016\n0.548384\n\n\nmin\n-2.883226\n-0.500010\n-0.415373\n\n\n25%\n-1.458197\n0.903574\n0.735729\n\n\n50%\n-1.159480\n1.199517\n1.084089\n\n\n75%\n-0.836746\n1.456610\n1.468611\n\n\nmax\n0.899436\n2.605330\n2.826109\n\n\n\n\n\n\n\nWe can see that both the user and the item embeddings have negative elements. There are some applications which require that the learnt embeddings be non-negative. This approach is also called non-negative matrix factorisation, which we’ll workout now.\n\n\n\nNon-negative Matrix factorisation (NNMF) in Keras\nThe code for NNMF remains exactly the same as the code for matrix factorisation. The only change is that we add non-negativity constraints on the learnt embeddings. This is done as follows:\n\nfrom keras.constraints import non_neg\nmovie_input = keras.layers.Input(shape=[1],name='Item')\nmovie_embedding = keras.layers.Embedding(n_movies + 1, n_latent_factors, name='NonNegMovie-Embedding', embeddings_constraint=non_neg())(movie_input)\nmovie_vec = keras.layers.Flatten(name='FlattenMovies')(movie_embedding)\n\nuser_input = keras.layers.Input(shape=[1],name='User')\nuser_vec = keras.layers.Flatten(name='FlattenUsers')(keras.layers.Embedding(n_users + 1, n_latent_factors,name='NonNegUser-Embedding',embeddings_constraint=non_neg())(user_input))\n\nprod = keras.layers.merge([movie_vec, user_vec], mode='dot',name='DotProduct')\nmodel = keras.Model([user_input, movie_input], prod)\nmodel.compile('adam', 'mean_squared_error')\n\nWe now verify if we are indeed able to learn non-negative embeddings. I’ll not compare the performance of NNMF on the test set, in the interest of space.\n\nhistory_nonneg = model.fit([train.user_id, train.item_id], train.rating, epochs=10, verbose=0)\n\n\nmovie_embedding_learnt = model.get_layer(name='NonNegMovie-Embedding').get_weights()[0]\npd.DataFrame(movie_embedding_learnt).describe()\n\n\n\n\n\n\n\n\n0\n1\n2\n\n\n\n\ncount\n1683.000000\n1683.000000\n1683.000000\n\n\nmean\n0.838450\n0.840330\n0.838066\n\n\nstd\n0.301618\n0.301529\n0.301040\n\n\nmin\n-0.000000\n-0.000000\n-0.000000\n\n\n25%\n0.657749\n0.663951\n0.656453\n\n\n50%\n0.901495\n0.904192\n0.895934\n\n\n75%\n1.072706\n1.073591\n1.072926\n\n\nmax\n1.365719\n1.379006\n1.373672\n\n\n\n\n\n\n\nLooks good!\n\n\nNeural networks for recommendation\nWe’ll now create a simple neural network for recommendation, or for estimating rating! This model is very similar to the earlier matrix factorisation models, but differs in the following ways:\n\nInstead of taking a dot product of the user and the item embedding, we concatenate them and use them as features for our neural network. Thus, we are not constrained to the dot product way of combining the embeddings, and can learn complex non-linear relationships.\nDue to #1, we can now have a different dimension of user and item embeddings. This can be useful if one dimension is larger than the other.\n\n\nn_latent_factors_user = 5\nn_latent_factors_movie = 8\n\nmovie_input = keras.layers.Input(shape=[1],name='Item')\nmovie_embedding = keras.layers.Embedding(n_movies + 1, n_latent_factors_movie, name='Movie-Embedding')(movie_input)\nmovie_vec = keras.layers.Flatten(name='FlattenMovies')(movie_embedding)\nmovie_vec = keras.layers.Dropout(0.2)(movie_vec)\n\n\nuser_input = keras.layers.Input(shape=[1],name='User')\nuser_vec = keras.layers.Flatten(name='FlattenUsers')(keras.layers.Embedding(n_users + 1, n_latent_factors_user,name='User-Embedding')(user_input))\nuser_vec = keras.layers.Dropout(0.2)(user_vec)\n\n\nconcat = keras.layers.merge([movie_vec, user_vec], mode='concat',name='Concat')\nconcat_dropout = keras.layers.Dropout(0.2)(concat)\ndense = keras.layers.Dense(200,name='FullyConnected')(concat)\ndropout_1 = keras.layers.Dropout(0.2,name='Dropout')(dense)\ndense_2 = keras.layers.Dense(100,name='FullyConnected-1')(concat)\ndropout_2 = keras.layers.Dropout(0.2,name='Dropout')(dense_2)\ndense_3 = keras.layers.Dense(50,name='FullyConnected-2')(dense_2)\ndropout_3 = keras.layers.Dropout(0.2,name='Dropout')(dense_3)\ndense_4 = keras.layers.Dense(20,name='FullyConnected-3', activation='relu')(dense_3)\n\n\nresult = keras.layers.Dense(1, activation='relu',name='Activation')(dense_4)\nadam = Adam(lr=0.005)\nmodel = keras.Model([user_input, movie_input], result)\nmodel.compile(optimizer=adam,loss= 'mean_absolute_error')\n\nLet’s now see how our model looks like:\n\nSVG(model_to_dot(model, show_shapes=True, show_layer_names=True, rankdir='HB').create(prog='dot', format='svg'))\n\n\n\n\n\n\n\n\nIt should be noted that we use a different number of embeddings for user (3) and items (5)! These combine to form a vector of length (5+3 = 8), which is then fed into the neural network. We also add a dropout layer to prevent overfitting!\n\nmodel.summary()\n\n__________________________________________________________________________________________________\nLayer (type) Output Shape Param # Connected to \n==================================================================================================\nItem (InputLayer) (None, 1) 0 \n__________________________________________________________________________________________________\nUser (InputLayer) (None, 1) 0 \n__________________________________________________________________________________________________\nMovie-Embedding (Embedding) (None, 1, 8) 13464 Item[0][0] \n__________________________________________________________________________________________________\nUser-Embedding (Embedding) (None, 1, 5) 4720 User[0][0] \n__________________________________________________________________________________________________\nFlattenMovies (Flatten) (None, 8) 0 Movie-Embedding[0][0] \n__________________________________________________________________________________________________\nFlattenUsers (Flatten) (None, 5) 0 User-Embedding[0][0] \n__________________________________________________________________________________________________\ndropout_1 (Dropout) (None, 8) 0 FlattenMovies[0][0] \n__________________________________________________________________________________________________\ndropout_2 (Dropout) (None, 5) 0 FlattenUsers[0][0] \n__________________________________________________________________________________________________\nConcat (Merge) (None, 13) 0 dropout_1[0][0] \n dropout_2[0][0] \n__________________________________________________________________________________________________\nFullyConnected-1 (Dense) (None, 100) 1400 Concat[0][0] \n__________________________________________________________________________________________________\nFullyConnected-2 (Dense) (None, 50) 5050 FullyConnected-1[0][0] \n__________________________________________________________________________________________________\nFullyConnected-3 (Dense) (None, 20) 1020 FullyConnected-2[0][0] \n__________________________________________________________________________________________________\nActivation (Dense) (None, 1) 21 FullyConnected-3[0][0] \n==================================================================================================\nTotal params: 25,675\nTrainable params: 25,675\nNon-trainable params: 0\n__________________________________________________________________________________________________\n\n\nWe can see that the number of parameters is more than what we had in the Matrix Factorisation case. Let’s see how this model works. I’ll run it for more epochs given that we have more parameters.\n\nhistory = model.fit([train.user_id, train.item_id], train.rating, epochs=250, verbose=0)\n\n\nPrediction performance of Neural Network based recommender system\n\ny_hat_2 = np.round(model.predict([test.user_id, test.item_id]),0)\nprint(mean_absolute_error(y_true, y_hat_2))\n\nprint(mean_absolute_error(y_true, model.predict([test.user_id, test.item_id])))\n\n\n0.6957\n0.708807692927\n\n\nPretty similar to the result we got using matrix factorisation. Maybe, we need to tweak around a lot more with the neural network to get better results?\nThanks for reading. This post has been a good learning experience for me. Hope you enjoyed too!" }, { "objectID": "posts/comparing-gp.html", @@ -144,14 +144,14 @@ "href": "posts/2018-06-21-aq-india-map.html", "title": "Mapping location of air quality sensing in India", "section": "", - "text": "In this notebook, I’ll show a quick example of how to use Folium (which internally uses LeafletJS) for visualising the location of air quality monitors in India. The purpose of this notebook is eductional in nature.\n\nStandard Imports\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n%matplotlib inline\n\n\n\nDownloading data from OpenAQ for 2018-04-06\n\n!wget --no-check-certificate https://openaq-data.s3.amazonaws.com/2018-04-06.csv -P /Users/nipun/Downloads/\n\n--2020-02-29 17:52:50-- https://openaq-data.s3.amazonaws.com/2018-04-06.csv\nResolving openaq-data.s3.amazonaws.com (openaq-data.s3.amazonaws.com)... 52.216.99.123\nConnecting to openaq-data.s3.amazonaws.com (openaq-data.s3.amazonaws.com)|52.216.99.123|:443... connected.\nWARNING: cannot verify openaq-data.s3.amazonaws.com's certificate, issued by ‘CN=DigiCert Baltimore CA-2 G2,OU=www.digicert.com,O=DigiCert Inc,C=US’:\n Unable to locally verify the issuer's authority.\nHTTP request sent, awaiting response... 200 OK\nLength: 133839107 (128M) [text/csv]\nSaving to: ‘/Users/nipun/Downloads/2018-04-06.csv.1’\n\n2018-04-06.csv.1 37%[======> ] 47.37M 3.79MB/s eta 40s ^C\n\n\n\nimport pandas as pd\ndf = pd.read_csv(\"/Users/nipun/Downloads/2018-04-06.csv\")\ndf = df[(df.country=='IN')&(df.parameter=='pm25')].dropna().groupby(\"location\").mean()\n\n\ndf\n\n\n\n\n\n\n\n\n\nvalue\nlatitude\nlongitude\n\n\nlocation\n\n\n\n\n\n\n\nAdarsh Nagar, Jaipur - RSPCB\n79.916667\n26.902909\n75.836853\n\n\nAnand Kala Kshetram, Rajamahendravaram - APPCB\n42.750000\n16.987287\n81.736318\n\n\nArdhali Bazar, Varanasi - UPPCB\n103.666667\n25.350599\n82.908307\n\n\nAsanol Court Area, Asanol - WBPCB\n56.833333\n23.685297\n86.945968\n\n\nAshok Nagar, Udaipur - RSPCB\n114.750000\n24.588617\n73.632140\n\n\n...\n...\n...\n...\n\n\nVasundhara, Ghaziabad, UP - UPPCB\n223.333333\n28.660335\n77.357256\n\n\nVikas Sadan, Gurgaon, Haryana - HSPCB\n280.250000\n28.450124\n77.026305\n\n\nVindhyachal STPS, Singrauli - MPPCB\n144.000000\n24.108970\n82.645580\n\n\nWard-32 Bapupara, Siliguri - WBPCB\n195.000000\n26.688305\n88.412668\n\n\nZoo Park, Hyderabad - TSPCB\n82.500000\n17.349694\n78.451437\n\n\n\n\n79 rows × 3 columns\n\n\n\n\n\n\nDownloading World GeoJson file\n\n!wget --no-check-certificate https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/world-countries.json\n\n--2020-02-29 17:53:17-- https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/world-countries.json\nResolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.8.133\nConnecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.8.133|:443... connected.\nWARNING: cannot verify raw.githubusercontent.com's certificate, issued by ‘CN=DigiCert SHA2 High Assurance Server CA,OU=www.digicert.com,O=DigiCert Inc,C=US’:\n Unable to locally verify the issuer's authority.\nHTTP request sent, awaiting response... 200 OK\nLength: 252515 (247K) [text/plain]\nSaving to: ‘world-countries.json’\n\nworld-countries.jso 100%[===================>] 246.60K 376KB/s in 0.7s \n\n2020-02-29 17:53:19 (376 KB/s) - ‘world-countries.json’ saved [252515/252515]\n\n\n\n\n\nCreating india.json correspdonding to Indian data\n\nimport json\ne = json.load(open('world-countries.json','r'))\njson.dump(e['features'][73], open('india.json','w'))\n\n\nimport folium\n\nfolium_map = folium.Map(width = '60%',height=800,location=[20, 77],\n zoom_start=5,\n tiles=\"Stamen Terrain\",min_lat=7, max_lat=35, min_lon=73, max_lon=90)\nfor x in df.iterrows():\n name = x[0]\n lat, lon = x[1]['latitude'], x[1]['longitude']\n folium.CircleMarker([lat, lon], radius=5, color='#000000',fill_color='#D3D3D3' , fill_opacity=1).add_to(folium_map)\n\nfolium.GeoJson('india.json').add_to(folium_map)\n\n<folium.features.GeoJson at 0x11e497bd0>\n\n\n\nfolium_map.save(\"map.html\")\n\n\nThere you go!Till next time." + "text": "In this notebook, I’ll show a quick example of how to use Folium (which internally uses LeafletJS) for visualising the location of air quality monitors in India. The purpose of this notebook is eductional in nature.\n\nStandard Imports\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n%matplotlib inline\n\n\n\nDownloading data from OpenAQ for 2018-04-06\n\n!wget --no-check-certificate https://openaq-data.s3.amazonaws.com/2018-04-06.csv -P /Users/nipun/Downloads/\n\n--2020-02-29 17:52:50-- https://openaq-data.s3.amazonaws.com/2018-04-06.csv\nResolving openaq-data.s3.amazonaws.com (openaq-data.s3.amazonaws.com)... 52.216.99.123\nConnecting to openaq-data.s3.amazonaws.com (openaq-data.s3.amazonaws.com)|52.216.99.123|:443... connected.\nWARNING: cannot verify openaq-data.s3.amazonaws.com's certificate, issued by ‘CN=DigiCert Baltimore CA-2 G2,OU=www.digicert.com,O=DigiCert Inc,C=US’:\n Unable to locally verify the issuer's authority.\nHTTP request sent, awaiting response... 200 OK\nLength: 133839107 (128M) [text/csv]\nSaving to: ‘/Users/nipun/Downloads/2018-04-06.csv.1’\n\n2018-04-06.csv.1 37%[======> ] 47.37M 3.79MB/s eta 40s ^C\n\n\n\nimport pandas as pd\ndf = pd.read_csv(\"/Users/nipun/Downloads/2018-04-06.csv\")\ndf = df[(df.country=='IN')&(df.parameter=='pm25')].dropna().groupby(\"location\").mean()\n\n\ndf\n\n\n\n\n\n\n\n\nvalue\nlatitude\nlongitude\n\n\nlocation\n\n\n\n\n\n\n\nAdarsh Nagar, Jaipur - RSPCB\n79.916667\n26.902909\n75.836853\n\n\nAnand Kala Kshetram, Rajamahendravaram - APPCB\n42.750000\n16.987287\n81.736318\n\n\nArdhali Bazar, Varanasi - UPPCB\n103.666667\n25.350599\n82.908307\n\n\nAsanol Court Area, Asanol - WBPCB\n56.833333\n23.685297\n86.945968\n\n\nAshok Nagar, Udaipur - RSPCB\n114.750000\n24.588617\n73.632140\n\n\n...\n...\n...\n...\n\n\nVasundhara, Ghaziabad, UP - UPPCB\n223.333333\n28.660335\n77.357256\n\n\nVikas Sadan, Gurgaon, Haryana - HSPCB\n280.250000\n28.450124\n77.026305\n\n\nVindhyachal STPS, Singrauli - MPPCB\n144.000000\n24.108970\n82.645580\n\n\nWard-32 Bapupara, Siliguri - WBPCB\n195.000000\n26.688305\n88.412668\n\n\nZoo Park, Hyderabad - TSPCB\n82.500000\n17.349694\n78.451437\n\n\n\n\n79 rows × 3 columns\n\n\n\n\n\nDownloading World GeoJson file\n\n!wget --no-check-certificate https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/world-countries.json\n\n--2020-02-29 17:53:17-- https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/world-countries.json\nResolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.8.133\nConnecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.8.133|:443... connected.\nWARNING: cannot verify raw.githubusercontent.com's certificate, issued by ‘CN=DigiCert SHA2 High Assurance Server CA,OU=www.digicert.com,O=DigiCert Inc,C=US’:\n Unable to locally verify the issuer's authority.\nHTTP request sent, awaiting response... 200 OK\nLength: 252515 (247K) [text/plain]\nSaving to: ‘world-countries.json’\n\nworld-countries.jso 100%[===================>] 246.60K 376KB/s in 0.7s \n\n2020-02-29 17:53:19 (376 KB/s) - ‘world-countries.json’ saved [252515/252515]\n\n\n\n\n\nCreating india.json correspdonding to Indian data\n\nimport json\ne = json.load(open('world-countries.json','r'))\njson.dump(e['features'][73], open('india.json','w'))\n\n\nimport folium\n\nfolium_map = folium.Map(width = '60%',height=800,location=[20, 77],\n zoom_start=5,\n tiles=\"Stamen Terrain\",min_lat=7, max_lat=35, min_lon=73, max_lon=90)\nfor x in df.iterrows():\n name = x[0]\n lat, lon = x[1]['latitude'], x[1]['longitude']\n folium.CircleMarker([lat, lon], radius=5, color='#000000',fill_color='#D3D3D3' , fill_opacity=1).add_to(folium_map)\n\nfolium.GeoJson('india.json').add_to(folium_map)\n\n<folium.features.GeoJson at 0x11e497bd0>\n\n\n\nfolium_map.save(\"map.html\")\n\n\nThere you go!Till next time." }, { "objectID": "posts/2017-04-21-constrained-nmf-cvx.html", "href": "posts/2017-04-21-constrained-nmf-cvx.html", "title": "Constrained Non-negative matrix factorisation using CVXPY", "section": "", - "text": "In a previous post, we saw how we can use CVXPY to perform non-negative matrix factorisation. In this post, I’ll show how to add additional constraints that may arise from the problem domain. As a trivial example, I’ll take constraints of the form when there is a less-than relationship among members of the matrix. For example, we may want to enforce certain movies to be always rated more than others? We’ll create a matrix of 30 users and 12 items. We will enforce the contraint that the rating of the first 6 items be atleast twice that of the last 6 items.\n\nCreating a ratings matrix\nWe will now create a matrix where the relationship among items exists.\n\nimport numpy as np\nimport pandas as pd\n\n\nK, N, M = 2, 12, 30\nY_gen = np.random.rand(M, K)\nX_1 = np.random.rand(K, N/2)\n# So that atleast twice as much\nX_2 = 2* X_1 + np.random.rand(K, N/2)\nX_gen = np.hstack([X_2, X_1])\n# Normalizing\nX_gen = X_gen/np.max(X_gen)\n# Creating A (ratings matrix of size M, N)\nA = np.dot(Y_gen, X_gen)\npd.DataFrame(A).head()\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n\n\n\n\n0\n0.732046\n0.613565\n0.961128\n0.920089\n0.244323\n0.506472\n0.280477\n0.251049\n0.324418\n0.378219\n0.075556\n0.131750\n\n\n1\n0.903630\n0.340956\n0.784109\n0.919741\n0.190856\n0.433635\n0.321932\n0.135134\n0.290862\n0.394680\n0.052976\n0.081148\n\n\n2\n0.972145\n0.576558\n1.046197\n1.098279\n0.261103\n0.562996\n0.358574\n0.233405\n0.368118\n0.460967\n0.077286\n0.128344\n\n\n3\n0.292231\n0.263864\n0.401968\n0.377116\n0.102567\n0.210890\n0.113070\n0.108163\n0.134489\n0.154266\n0.031993\n0.056299\n\n\n4\n0.694038\n0.803459\n1.125454\n0.987344\n0.290605\n0.582178\n0.278848\n0.331075\n0.365935\n0.397023\n0.093088\n0.168300\n\n\n\n\n\n\n\n\nWe can see that for each user, the 0th item has higher rating compared to the 5th, 1st more than the 6th and so on. Now, in our alternating least squares implementation, we break down A as Y.X. Here X has dimensions of K, N. To ensure the relationship among the items, we will put contraints on X of the form: X[:, 0] > 2 x X[:, 5] and so on. We will create a simple for loop for the same.\n\ne = \"[\"\nfor a in range(N/2):\n e+=\"X[:,%d] > 2 * X[:,%d],\" %(a, a+N/2)\ne = e[:-1] + \"]\"\ne\n\n'[X[:,0] > 2 * X[:,6],X[:,1] > 2 * X[:,7],X[:,2] > 2 * X[:,8],X[:,3] > 2 * X[:,9],X[:,4] > 2 * X[:,10],X[:,5] > 2 * X[:,11]]'\n\n\nAs we can see, we now have 6 constraints that we can feed into the optimisation routine. Whenever we learn X in the ALS, we apply these constraint.\n\n\nCVX routine for handling input constraints\n\ndef nmf_features(A, k, MAX_ITERS=30, input_constraints_X=None, input_constraints_Y = None):\n import cvxpy as cvx\n np.random.seed(0)\n\n # Generate random data matrix A.\n m, n = A.shape\n mask = ~np.isnan(A)\n\n # Initialize Y randomly.\n Y_init = np.random.rand(m, k)\n Y = Y_init\n\n # Perform alternating minimization.\n\n residual = np.zeros(MAX_ITERS)\n for iter_num in xrange(1, 1 + MAX_ITERS):\n \n # For odd iterations, treat Y constant, optimize over X.\n if iter_num % 2 == 1:\n X = cvx.Variable(k, n)\n constraint = [X >= 0]\n if input_constraints_X:\n constraint.extend(eval(input_constraints_X))\n\n # For even iterations, treat X constant, optimize over Y.\n else:\n Y = cvx.Variable(m, k)\n constraint = [Y >= 0]\n \n\n Temp = Y * X\n error = A[mask] - (Y * X)[mask]\n \n \n obj = cvx.Minimize(cvx.norm(error, 'fro'))\n \n\n prob = cvx.Problem(obj, constraint)\n prob.solve(solver=cvx.SCS)\n\n if prob.status != cvx.OPTIMAL:\n pass\n \n residual[iter_num - 1] = prob.value\n \n if iter_num % 2 == 1:\n X = X.value\n else:\n Y = Y.value\n return X, Y, residual\n\n\n# Without constraints\nX, Y, r = nmf_features(A, 3, MAX_ITERS=20)\n# With contstraints\nX_c, Y_c, r_c = nmf_features(A, 3, MAX_ITERS=20, input_constraints_X=e)\n\n\npd.DataFrame(X)\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n\n\n\n\n0\n0.749994\n0.112355\n0.485850\n0.674801\n0.113004\n0.281371\n0.257239\n0.04056\n0.196474\n0.297978\n0.02745\n0.033952\n\n\n1\n0.102384\n0.222149\n0.266055\n0.199361\n0.070403\n0.133510\n0.047174\n0.09233\n0.081233\n0.076518\n0.02375\n0.045097\n\n\n2\n0.567213\n0.558638\n0.825066\n0.756059\n0.211427\n0.430690\n0.222174\n0.22944\n0.273260\n0.307475\n0.06659\n0.118371\n\n\n\n\n\n\n\n\n\npd.DataFrame(X_c)\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n\n\n\n\n0\n0.749882\n0.112384\n0.485923\n0.674778\n0.113027\n0.281399\n0.257206\n0.040566\n0.196489\n0.297960\n0.027461\n0.033971\n\n\n1\n0.102366\n0.222080\n0.266058\n0.199353\n0.070404\n0.133511\n0.047168\n0.092298\n0.081233\n0.076513\n0.023751\n0.045091\n\n\n2\n0.567363\n0.558700\n0.825253\n0.756242\n0.211473\n0.430789\n0.222234\n0.229470\n0.273319\n0.307549\n0.066604\n0.118382\n\n\n\n\n\n\n\n\nOk. The obtained X matrix looks fairly similar. How about we reverse the constraints.\n\ne_rev = \"[\"\nfor a in range(N/2):\n e_rev+=\" 2* X[:,%d] < X[:,%d],\" %(a, a+N/2)\ne_rev = e_rev[:-1] + \"]\"\ne_rev\n\n'[ 2* X[:,0] < X[:,6], 2* X[:,1] < X[:,7], 2* X[:,2] < X[:,8], 2* X[:,3] < X[:,9], 2* X[:,4] < X[:,10], 2* X[:,5] < X[:,11]]'\n\n\n\nX_c_rev, Y_c_rev, r_c_rev = nmf_features(A, 3, MAX_ITERS=20, input_constraints_X=e_rev)\n\n\npd.DataFrame(X_c_rev)\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n\n\n\n\n0\n0.250945\n0.038070\n0.174189\n0.252085\n0.033251\n0.069176\n0.502026\n0.076147\n0.348450\n0.504277\n0.066521\n0.138405\n\n\n1\n0.030757\n0.088033\n0.085947\n0.065135\n0.024395\n0.045976\n0.061398\n0.176002\n0.171773\n0.130146\n0.048760\n0.091882\n\n\n2\n0.220256\n0.183292\n0.269014\n0.282814\n0.065713\n0.128120\n0.440553\n0.366600\n0.538065\n0.565669\n0.131436\n0.256263\n\n\n\n\n\n\n\n\nThere you go! We now have learnt latent factors that conform to our constraints." + "text": "In a previous post, we saw how we can use CVXPY to perform non-negative matrix factorisation. In this post, I’ll show how to add additional constraints that may arise from the problem domain. As a trivial example, I’ll take constraints of the form when there is a less-than relationship among members of the matrix. For example, we may want to enforce certain movies to be always rated more than others? We’ll create a matrix of 30 users and 12 items. We will enforce the contraint that the rating of the first 6 items be atleast twice that of the last 6 items.\n\nCreating a ratings matrix\nWe will now create a matrix where the relationship among items exists.\n\nimport numpy as np\nimport pandas as pd\n\n\nK, N, M = 2, 12, 30\nY_gen = np.random.rand(M, K)\nX_1 = np.random.rand(K, N/2)\n# So that atleast twice as much\nX_2 = 2* X_1 + np.random.rand(K, N/2)\nX_gen = np.hstack([X_2, X_1])\n# Normalizing\nX_gen = X_gen/np.max(X_gen)\n# Creating A (ratings matrix of size M, N)\nA = np.dot(Y_gen, X_gen)\npd.DataFrame(A).head()\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n\n\n\n\n0\n0.732046\n0.613565\n0.961128\n0.920089\n0.244323\n0.506472\n0.280477\n0.251049\n0.324418\n0.378219\n0.075556\n0.131750\n\n\n1\n0.903630\n0.340956\n0.784109\n0.919741\n0.190856\n0.433635\n0.321932\n0.135134\n0.290862\n0.394680\n0.052976\n0.081148\n\n\n2\n0.972145\n0.576558\n1.046197\n1.098279\n0.261103\n0.562996\n0.358574\n0.233405\n0.368118\n0.460967\n0.077286\n0.128344\n\n\n3\n0.292231\n0.263864\n0.401968\n0.377116\n0.102567\n0.210890\n0.113070\n0.108163\n0.134489\n0.154266\n0.031993\n0.056299\n\n\n4\n0.694038\n0.803459\n1.125454\n0.987344\n0.290605\n0.582178\n0.278848\n0.331075\n0.365935\n0.397023\n0.093088\n0.168300\n\n\n\n\n\n\n\nWe can see that for each user, the 0th item has higher rating compared to the 5th, 1st more than the 6th and so on. Now, in our alternating least squares implementation, we break down A as Y.X. Here X has dimensions of K, N. To ensure the relationship among the items, we will put contraints on X of the form: X[:, 0] > 2 x X[:, 5] and so on. We will create a simple for loop for the same.\n\ne = \"[\"\nfor a in range(N/2):\n e+=\"X[:,%d] > 2 * X[:,%d],\" %(a, a+N/2)\ne = e[:-1] + \"]\"\ne\n\n'[X[:,0] > 2 * X[:,6],X[:,1] > 2 * X[:,7],X[:,2] > 2 * X[:,8],X[:,3] > 2 * X[:,9],X[:,4] > 2 * X[:,10],X[:,5] > 2 * X[:,11]]'\n\n\nAs we can see, we now have 6 constraints that we can feed into the optimisation routine. Whenever we learn X in the ALS, we apply these constraint.\n\n\nCVX routine for handling input constraints\n\ndef nmf_features(A, k, MAX_ITERS=30, input_constraints_X=None, input_constraints_Y = None):\n import cvxpy as cvx\n np.random.seed(0)\n\n # Generate random data matrix A.\n m, n = A.shape\n mask = ~np.isnan(A)\n\n # Initialize Y randomly.\n Y_init = np.random.rand(m, k)\n Y = Y_init\n\n # Perform alternating minimization.\n\n residual = np.zeros(MAX_ITERS)\n for iter_num in xrange(1, 1 + MAX_ITERS):\n \n # For odd iterations, treat Y constant, optimize over X.\n if iter_num % 2 == 1:\n X = cvx.Variable(k, n)\n constraint = [X >= 0]\n if input_constraints_X:\n constraint.extend(eval(input_constraints_X))\n\n # For even iterations, treat X constant, optimize over Y.\n else:\n Y = cvx.Variable(m, k)\n constraint = [Y >= 0]\n \n\n Temp = Y * X\n error = A[mask] - (Y * X)[mask]\n \n \n obj = cvx.Minimize(cvx.norm(error, 'fro'))\n \n\n prob = cvx.Problem(obj, constraint)\n prob.solve(solver=cvx.SCS)\n\n if prob.status != cvx.OPTIMAL:\n pass\n \n residual[iter_num - 1] = prob.value\n \n if iter_num % 2 == 1:\n X = X.value\n else:\n Y = Y.value\n return X, Y, residual\n\n\n# Without constraints\nX, Y, r = nmf_features(A, 3, MAX_ITERS=20)\n# With contstraints\nX_c, Y_c, r_c = nmf_features(A, 3, MAX_ITERS=20, input_constraints_X=e)\n\n\npd.DataFrame(X)\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n\n\n\n\n0\n0.749994\n0.112355\n0.485850\n0.674801\n0.113004\n0.281371\n0.257239\n0.04056\n0.196474\n0.297978\n0.02745\n0.033952\n\n\n1\n0.102384\n0.222149\n0.266055\n0.199361\n0.070403\n0.133510\n0.047174\n0.09233\n0.081233\n0.076518\n0.02375\n0.045097\n\n\n2\n0.567213\n0.558638\n0.825066\n0.756059\n0.211427\n0.430690\n0.222174\n0.22944\n0.273260\n0.307475\n0.06659\n0.118371\n\n\n\n\n\n\n\n\npd.DataFrame(X_c)\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n\n\n\n\n0\n0.749882\n0.112384\n0.485923\n0.674778\n0.113027\n0.281399\n0.257206\n0.040566\n0.196489\n0.297960\n0.027461\n0.033971\n\n\n1\n0.102366\n0.222080\n0.266058\n0.199353\n0.070404\n0.133511\n0.047168\n0.092298\n0.081233\n0.076513\n0.023751\n0.045091\n\n\n2\n0.567363\n0.558700\n0.825253\n0.756242\n0.211473\n0.430789\n0.222234\n0.229470\n0.273319\n0.307549\n0.066604\n0.118382\n\n\n\n\n\n\n\nOk. The obtained X matrix looks fairly similar. How about we reverse the constraints.\n\ne_rev = \"[\"\nfor a in range(N/2):\n e_rev+=\" 2* X[:,%d] < X[:,%d],\" %(a, a+N/2)\ne_rev = e_rev[:-1] + \"]\"\ne_rev\n\n'[ 2* X[:,0] < X[:,6], 2* X[:,1] < X[:,7], 2* X[:,2] < X[:,8], 2* X[:,3] < X[:,9], 2* X[:,4] < X[:,10], 2* X[:,5] < X[:,11]]'\n\n\n\nX_c_rev, Y_c_rev, r_c_rev = nmf_features(A, 3, MAX_ITERS=20, input_constraints_X=e_rev)\n\n\npd.DataFrame(X_c_rev)\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n\n\n\n\n0\n0.250945\n0.038070\n0.174189\n0.252085\n0.033251\n0.069176\n0.502026\n0.076147\n0.348450\n0.504277\n0.066521\n0.138405\n\n\n1\n0.030757\n0.088033\n0.085947\n0.065135\n0.024395\n0.045976\n0.061398\n0.176002\n0.171773\n0.130146\n0.048760\n0.091882\n\n\n2\n0.220256\n0.183292\n0.269014\n0.282814\n0.065713\n0.128120\n0.440553\n0.366600\n0.538065\n0.565669\n0.131436\n0.256263\n\n\n\n\n\n\n\nThere you go! We now have learnt latent factors that conform to our constraints." }, { "objectID": "posts/2022-01-29-kl-divergence.html", @@ -179,7 +179,7 @@ "href": "posts/2024-rnn.html", "title": "RNN", "section": "", - "text": "import numpy as np\nimport matplotlib.pyplot as plt\n%matplotlib inline\n%config InlineBackend.figure_format = 'retina'\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom einops import rearrange, reduce, repeat\n\n\n!wget https://raw.githubusercontent.com/MASTREX/List-of-Indian-Names/master/2.%20First.txt -O names-indian.txt\n\n--2024-05-30 09:41:48-- https://raw.githubusercontent.com/MASTREX/List-of-Indian-Names/master/2.%20First.txt\nResolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.108.133, ...\nConnecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 8752 (8.5K) [text/plain]\nSaving to: ‘names-indian.txt’\n\nnames-indian.txt 100%[===================>] 8.55K --.-KB/s in 0s \n\n2024-05-30 09:41:49 (33.8 MB/s) - ‘names-indian.txt’ saved [8752/8752]\n\n\n\n\nimport pandas as pd\npd.read_csv('names-indian.txt', header=None)\n\n\n\n\n\n\n\n\n\n0\n\n\n\n\n0\nAbhishek\n\n\n1\nAman\n\n\n2\nHarsh\n\n\n3\nAyush\n\n\n4\nAditi\n\n\n...\n...\n\n\n1160\nPrasoon\n\n\n1161\nMadhusudan\n\n\n1162\nPrastuti\n\n\n1163\nRampratap\n\n\n1164\nMadhukar\n\n\n\n\n1165 rows × 1 columns\n\n\n\n\n\n# convert all names to lowercase\nnames = pd.read_csv('names-indian.txt', header=None)[0].str.lower().values\n\n\nnames\n\narray(['abhishek', 'aman', 'harsh', ..., 'prastuti', 'rampratap',\n 'madhukar'], dtype=object)\n\n\n\n# KDE plot of name lengths\nplt.figure(figsize=(8, 4))\nplt.hist([len(name) for name in names], bins=range(1, 20), density=True, alpha=0.7)\nplt.xlabel('Name length')\nplt.ylabel('Density')\n\nText(0, 0.5, 'Density')\n\n\n\n\n\n\n\n\n\n\n# Attach START and END tokens to each name. Need to add these two to the vocabulary.\nstart_symbol = '^'\nend_symbol = '$'\n\nnames = [start_symbol + name + end_symbol for name in names]\nnames[:5]\n\n['^abhishek$', '^aman$', '^harsh$', '^ayush$', '^aditi$']\n\n\n\n# Find unique characters in the dataset\nvocab = set(''.join(names))\nvocab = sorted(vocab)\nprint(vocab, len(vocab))\n\n['$', '^', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] 28\n\n\n\n# Create a d dimensional lookup table for each character in the vocabulary\nclass CharTable:\n def __init__(self, vocab):\n self.vocab = vocab\n self.char2index = {c: i for i, c in enumerate(vocab)}\n self.index2char = {i: c for i, c in enumerate(vocab)}\n self.vocab_size = len(vocab)\n \n def encode(self, name):\n return torch.tensor([self.char2index[c] for c in name])\n \n def decode(self, tensor):\n if type(tensor) == torch.Tensor:\n tensor = tensor.cpu().numpy()\n return ''.join([self.index2char[i] for i in tensor])\n\n\nct = CharTable(vocab)\n\n\nct.encode('^'), ct.encode('$'), ct.encode('a'), ct.encode('z'), ct.encode('ab'), ct.encode('za')\n\n(tensor([1]),\n tensor([0]),\n tensor([2]),\n tensor([27]),\n tensor([2, 3]),\n tensor([27, 2]))\n\n\n\nct.decode([1]), ct.decode(torch.tensor([1])), ct.decode(torch.tensor([1, 2, 3]))\n\n('^', '^', '^ab')\n\n\n\n# create embedding layer\nclass CharEmbedding(nn.Module):\n def __init__(self, vocab_size, embed_size):\n super(CharEmbedding, self).__init__()\n self.embedding = nn.Embedding(vocab_size, embed_size)\n \n def forward(self, x):\n return self.embedding(x)\n\n\nchar_embedding = CharEmbedding(ct.vocab_size, 2)\n\n\ndef plot_2d_embeddings(embedding, vocab):\n plt.figure(figsize=(4, 4))\n for i, char in enumerate(vocab):\n tensor = ct.encode(char)\n embedding = char_embedding(tensor)\n plt.scatter(embedding[0, 0].item(), embedding[0, 1].item())\n plt.text(embedding[0, 0].item(), embedding[0, 1].item(), char)\n plt.xlabel('Dimension 1')\n plt.ylabel('Dimension 2')\n\nplot_2d_embeddings(char_embedding, vocab)\n\n\n\n\n\n\n\n\n\nimport torch.nn.functional as F\n\nclass RNN(nn.Module):\n def __init__(self, input_size, hidden_size, output_size):\n super(RNN, self).__init__()\n\n self.hidden_size = hidden_size\n\n self.i2h = nn.Linear(input_size, hidden_size)\n self.h2h = nn.Linear(hidden_size, hidden_size)\n self.h2o = nn.Linear(hidden_size, output_size)\n self.softmax = nn.LogSoftmax(dim=1)\n\n def forward(self, input, hidden):\n hidden = F.tanh(self.i2h(input) + self.h2h(hidden))\n output = self.h2o(hidden)\n output = self.softmax(output)\n return output, hidden\n\n def init_hidden(self):\n return torch.zeros(1, self.hidden_size)\n\n\nrnn = RNN(2, 128, ct.vocab_size)\n\n\n# Predict the next character given the current character\ncurrent_char = \"a\"\nprint(\"Current character:\", current_char)\n# convert to tensor\ncurrent_tensor = ct.encode(current_char)\nprint(\"Curent tensor:\", current_tensor)\n# Look up the embedding\ncurrent_embedding = char_embedding(current_tensor)\nprint(\"Current embedding:\", current_embedding)\n# Initialize the hidden state\nhidden = rnn.init_hidden()\n#print(hidden)\n# Pass the embedding and hidden state through the RNN\noutput, hidden = rnn(current_embedding, hidden)\nprint(output)\n\n# Print the predicted character (most probable)\n_, predicted_index = output.topk(1)\n# flatten the tensor\npredicted_index = predicted_index.squeeze().item()\n# convert to character\npredicted_char = ct.decode([predicted_index])\nprint(\"Predicted character:\", predicted_char)\n\nCurrent character: a\nCurent tensor: tensor([2])\nCurrent embedding: tensor([[-1.4545, 0.9880]], grad_fn=<EmbeddingBackward0>)\ntensor([[-2.5902, -3.3533, -3.8653, -3.9548, -3.5940, -2.8801, -3.4821, -3.0470,\n -3.5943, -3.5595, -3.6062, -3.5047, -3.6877, -3.3012, -3.7079, -4.4289,\n -2.9308, -3.6200, -3.3797, -3.7172, -2.8883, -2.6247, -3.7265, -3.3239,\n -3.7247, -2.9247, -3.4027, -3.2497]], grad_fn=<LogSoftmaxBackward0>)\nPredicted character: $\n\n\n\n# Create a function to generate a word (sequence of characters) given a \n# starting sequence of characters (stops when END token is predicted) \n# or if the length of the generated word exceeds a certain limit of 10 characters\ndef create_name(start_string, rnn, char_embedding, ct):\n with torch.no_grad():\n # start with the last character in the start_string\n current_char = start_string[-1]\n current_tensor = ct.encode(current_char)\n current_embedding = char_embedding(current_tensor)\n hidden = rnn.init_hidden()\n name = start_string\n while current_char != end_symbol and len(name) < 10:\n output, hidden = rnn(current_embedding, hidden)\n # Find the next character by sampling from the output distribution\n predicted_index = torch.multinomial(torch.exp(output), 1).item()\n current_char = ct.decode([predicted_index])\n _, predicted_index = output.topk(1)\n predicted_index = predicted_index.squeeze().item()\n current_char = ct.decode([predicted_index])\n name += current_char\n current_tensor = ct.encode(current_char)\n current_embedding = char_embedding(current_tensor)\n return name\n \n\n\ncreate_name('^a', rnn, char_embedding, ct)\n\n'^anm$'\n\n\n\ncreate_name('^c', rnn, char_embedding, ct)\n\n'^c$'\n\n\n\n# Generate dataset for training\ndef generate_data(names, ct):\n X = []\n Y = []\n for name in names:\n for i in range(1, len(name)):\n X.append(name[i-1])\n Y.append(name[i])\n X = [ct.encode(x) for x in X]\n Y = [ct.encode(y) for y in Y]\n return X, Y\n\nX, Y = generate_data(names, ct)\n\n\nX[0], Y[0], X[1], Y[1], X[2], Y[2]\n\n(tensor([1]), tensor([2]))\n\n\n\nprint(names[0])\nprint(ct.decode(X[0]), ct.decode(Y[0]))\nprint(ct.decode(X[1]), ct.decode(Y[1]))\nprint(ct.decode(X[2]), ct.decode(Y[2]))\n\n^abhishek$\n^ a\na b\nb h\n\n\n\n# Training loop\nnum_epochs = 12\nlearning_rate = 3e-4\nembedding_size = 8\nhidden_size = 32\nrnn = RNN(embedding_size, hidden_size, ct.vocab_size)\nembedding = CharEmbedding(ct.vocab_size, embedding_size)\n\noptimizer = torch.optim.Adam(list(rnn.parameters()) + list(embedding.parameters()), lr=learning_rate)\n\ncriterion = nn.NLLLoss()\n\nfor epoch in range(num_epochs):\n total_loss = 0\n for i in range(len(X)):\n optimizer.zero_grad()\n hidden = rnn.init_hidden()\n input_tensor = X[i]\n target_tensor = Y[i].squeeze()\n input_embedding = embedding(input_tensor)\n target_tensor = target_tensor.unsqueeze(0)\n output, hidden = rnn(input_embedding, hidden)\n \n predicted_next_char = output.argmax().item()\n \n loss = criterion(output, target_tensor)\n loss.backward()\n optimizer.step()\n total_loss += loss.item()\n \n #print(i, loss.item())\n if (epoch+1) % 1 == 0:\n print(f'Epoch: {epoch+1}/{num_epochs}, Loss: {total_loss/len(X)}')\n\nEpoch: 1/12, Loss: 2.684675631081001\nEpoch: 2/12, Loss: 2.4274482760898484\nEpoch: 3/12, Loss: 2.3604175581492934\nEpoch: 4/12, Loss: 2.3314669918697972\nEpoch: 5/12, Loss: 2.3155676853116023\nEpoch: 6/12, Loss: 2.3054449003057\nEpoch: 7/12, Loss: 2.2983417296262845\nEpoch: 8/12, Loss: 2.2929774504282614\nEpoch: 9/12, Loss: 2.2887099773854604\nEpoch: 10/12, Loss: 2.2851798680263626\nEpoch: 11/12, Loss: 2.2821793051528485\nEpoch: 12/12, Loss: 2.2795761335450453\n\n\n\nplot_2d_embeddings(embedding, vocab)\n\n\n\n\n\n\n\n\n\ncreate_name('^a', rnn, embedding, ct)\n\n'^an$'\n\n\n\ncreate_name('^b', rnn, embedding, ct)\n\n'^bhan$'\n\n\n\ncreate_name('^c', rnn, embedding, ct)\n\n'^chan$'\n\n\n\ncreate_name('^d', rnn, embedding, ct)\n\n'^dan$'\n\n\n\ncreate_name('^n', rnn, embedding, ct)\n\n'^n$'" + "text": "import numpy as np\nimport matplotlib.pyplot as plt\n%matplotlib inline\n%config InlineBackend.figure_format = 'retina'\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom einops import rearrange, reduce, repeat\n\n\n!wget https://raw.githubusercontent.com/MASTREX/List-of-Indian-Names/master/2.%20First.txt -O names-indian.txt\n\n--2024-05-30 09:41:48-- https://raw.githubusercontent.com/MASTREX/List-of-Indian-Names/master/2.%20First.txt\nResolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.108.133, ...\nConnecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 8752 (8.5K) [text/plain]\nSaving to: ‘names-indian.txt’\n\nnames-indian.txt 100%[===================>] 8.55K --.-KB/s in 0s \n\n2024-05-30 09:41:49 (33.8 MB/s) - ‘names-indian.txt’ saved [8752/8752]\n\n\n\n\nimport pandas as pd\npd.read_csv('names-indian.txt', header=None)\n\n\n\n\n\n\n\n\n0\n\n\n\n\n0\nAbhishek\n\n\n1\nAman\n\n\n2\nHarsh\n\n\n3\nAyush\n\n\n4\nAditi\n\n\n...\n...\n\n\n1160\nPrasoon\n\n\n1161\nMadhusudan\n\n\n1162\nPrastuti\n\n\n1163\nRampratap\n\n\n1164\nMadhukar\n\n\n\n\n1165 rows × 1 columns\n\n\n\n\n# convert all names to lowercase\nnames = pd.read_csv('names-indian.txt', header=None)[0].str.lower().values\n\n\nnames\n\narray(['abhishek', 'aman', 'harsh', ..., 'prastuti', 'rampratap',\n 'madhukar'], dtype=object)\n\n\n\n# KDE plot of name lengths\nplt.figure(figsize=(8, 4))\nplt.hist([len(name) for name in names], bins=range(1, 20), density=True, alpha=0.7)\nplt.xlabel('Name length')\nplt.ylabel('Density')\n\nText(0, 0.5, 'Density')\n\n\n\n\n\n\n\n\n\n\n# Attach START and END tokens to each name. Need to add these two to the vocabulary.\nstart_symbol = '^'\nend_symbol = '$'\n\nnames = [start_symbol + name + end_symbol for name in names]\nnames[:5]\n\n['^abhishek$', '^aman$', '^harsh$', '^ayush$', '^aditi$']\n\n\n\n# Find unique characters in the dataset\nvocab = set(''.join(names))\nvocab = sorted(vocab)\nprint(vocab, len(vocab))\n\n['$', '^', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] 28\n\n\n\n# Create a d dimensional lookup table for each character in the vocabulary\nclass CharTable:\n def __init__(self, vocab):\n self.vocab = vocab\n self.char2index = {c: i for i, c in enumerate(vocab)}\n self.index2char = {i: c for i, c in enumerate(vocab)}\n self.vocab_size = len(vocab)\n \n def encode(self, name):\n return torch.tensor([self.char2index[c] for c in name])\n \n def decode(self, tensor):\n if type(tensor) == torch.Tensor:\n tensor = tensor.cpu().numpy()\n return ''.join([self.index2char[i] for i in tensor])\n\n\nct = CharTable(vocab)\n\n\nct.encode('^'), ct.encode('$'), ct.encode('a'), ct.encode('z'), ct.encode('ab'), ct.encode('za')\n\n(tensor([1]),\n tensor([0]),\n tensor([2]),\n tensor([27]),\n tensor([2, 3]),\n tensor([27, 2]))\n\n\n\nct.decode([1]), ct.decode(torch.tensor([1])), ct.decode(torch.tensor([1, 2, 3]))\n\n('^', '^', '^ab')\n\n\n\n# create embedding layer\nclass CharEmbedding(nn.Module):\n def __init__(self, vocab_size, embed_size):\n super(CharEmbedding, self).__init__()\n self.embedding = nn.Embedding(vocab_size, embed_size)\n \n def forward(self, x):\n return self.embedding(x)\n\n\nchar_embedding = CharEmbedding(ct.vocab_size, 2)\n\n\ndef plot_2d_embeddings(embedding, vocab):\n plt.figure(figsize=(4, 4))\n for i, char in enumerate(vocab):\n tensor = ct.encode(char)\n embedding = char_embedding(tensor)\n plt.scatter(embedding[0, 0].item(), embedding[0, 1].item())\n plt.text(embedding[0, 0].item(), embedding[0, 1].item(), char)\n plt.xlabel('Dimension 1')\n plt.ylabel('Dimension 2')\n\nplot_2d_embeddings(char_embedding, vocab)\n\n\n\n\n\n\n\n\n\nimport torch.nn.functional as F\n\nclass RNN(nn.Module):\n def __init__(self, input_size, hidden_size, output_size):\n super(RNN, self).__init__()\n\n self.hidden_size = hidden_size\n\n self.i2h = nn.Linear(input_size, hidden_size)\n self.h2h = nn.Linear(hidden_size, hidden_size)\n self.h2o = nn.Linear(hidden_size, output_size)\n self.softmax = nn.LogSoftmax(dim=1)\n\n def forward(self, input, hidden):\n hidden = F.tanh(self.i2h(input) + self.h2h(hidden))\n output = self.h2o(hidden)\n output = self.softmax(output)\n return output, hidden\n\n def init_hidden(self):\n return torch.zeros(1, self.hidden_size)\n\n\nrnn = RNN(2, 128, ct.vocab_size)\n\n\n# Predict the next character given the current character\ncurrent_char = \"a\"\nprint(\"Current character:\", current_char)\n# convert to tensor\ncurrent_tensor = ct.encode(current_char)\nprint(\"Curent tensor:\", current_tensor)\n# Look up the embedding\ncurrent_embedding = char_embedding(current_tensor)\nprint(\"Current embedding:\", current_embedding)\n# Initialize the hidden state\nhidden = rnn.init_hidden()\n#print(hidden)\n# Pass the embedding and hidden state through the RNN\noutput, hidden = rnn(current_embedding, hidden)\nprint(output)\n\n# Print the predicted character (most probable)\n_, predicted_index = output.topk(1)\n# flatten the tensor\npredicted_index = predicted_index.squeeze().item()\n# convert to character\npredicted_char = ct.decode([predicted_index])\nprint(\"Predicted character:\", predicted_char)\n\nCurrent character: a\nCurent tensor: tensor([2])\nCurrent embedding: tensor([[-1.4545, 0.9880]], grad_fn=<EmbeddingBackward0>)\ntensor([[-2.5902, -3.3533, -3.8653, -3.9548, -3.5940, -2.8801, -3.4821, -3.0470,\n -3.5943, -3.5595, -3.6062, -3.5047, -3.6877, -3.3012, -3.7079, -4.4289,\n -2.9308, -3.6200, -3.3797, -3.7172, -2.8883, -2.6247, -3.7265, -3.3239,\n -3.7247, -2.9247, -3.4027, -3.2497]], grad_fn=<LogSoftmaxBackward0>)\nPredicted character: $\n\n\n\n# Create a function to generate a word (sequence of characters) given a \n# starting sequence of characters (stops when END token is predicted) \n# or if the length of the generated word exceeds a certain limit of 10 characters\ndef create_name(start_string, rnn, char_embedding, ct):\n with torch.no_grad():\n # start with the last character in the start_string\n current_char = start_string[-1]\n current_tensor = ct.encode(current_char)\n current_embedding = char_embedding(current_tensor)\n hidden = rnn.init_hidden()\n name = start_string\n while current_char != end_symbol and len(name) < 10:\n output, hidden = rnn(current_embedding, hidden)\n # Find the next character by sampling from the output distribution\n predicted_index = torch.multinomial(torch.exp(output), 1).item()\n current_char = ct.decode([predicted_index])\n _, predicted_index = output.topk(1)\n predicted_index = predicted_index.squeeze().item()\n current_char = ct.decode([predicted_index])\n name += current_char\n current_tensor = ct.encode(current_char)\n current_embedding = char_embedding(current_tensor)\n return name\n \n\n\ncreate_name('^a', rnn, char_embedding, ct)\n\n'^anm$'\n\n\n\ncreate_name('^c', rnn, char_embedding, ct)\n\n'^c$'\n\n\n\n# Generate dataset for training\ndef generate_data(names, ct):\n X = []\n Y = []\n for name in names:\n for i in range(1, len(name)):\n X.append(name[i-1])\n Y.append(name[i])\n X = [ct.encode(x) for x in X]\n Y = [ct.encode(y) for y in Y]\n return X, Y\n\nX, Y = generate_data(names, ct)\n\n\nX[0], Y[0], X[1], Y[1], X[2], Y[2]\n\n(tensor([1]), tensor([2]))\n\n\n\nprint(names[0])\nprint(ct.decode(X[0]), ct.decode(Y[0]))\nprint(ct.decode(X[1]), ct.decode(Y[1]))\nprint(ct.decode(X[2]), ct.decode(Y[2]))\n\n^abhishek$\n^ a\na b\nb h\n\n\n\n# Training loop\nnum_epochs = 12\nlearning_rate = 3e-4\nembedding_size = 8\nhidden_size = 32\nrnn = RNN(embedding_size, hidden_size, ct.vocab_size)\nembedding = CharEmbedding(ct.vocab_size, embedding_size)\n\noptimizer = torch.optim.Adam(list(rnn.parameters()) + list(embedding.parameters()), lr=learning_rate)\n\ncriterion = nn.NLLLoss()\n\nfor epoch in range(num_epochs):\n total_loss = 0\n for i in range(len(X)):\n optimizer.zero_grad()\n hidden = rnn.init_hidden()\n input_tensor = X[i]\n target_tensor = Y[i].squeeze()\n input_embedding = embedding(input_tensor)\n target_tensor = target_tensor.unsqueeze(0)\n output, hidden = rnn(input_embedding, hidden)\n \n predicted_next_char = output.argmax().item()\n \n loss = criterion(output, target_tensor)\n loss.backward()\n optimizer.step()\n total_loss += loss.item()\n \n #print(i, loss.item())\n if (epoch+1) % 1 == 0:\n print(f'Epoch: {epoch+1}/{num_epochs}, Loss: {total_loss/len(X)}')\n\nEpoch: 1/12, Loss: 2.684675631081001\nEpoch: 2/12, Loss: 2.4274482760898484\nEpoch: 3/12, Loss: 2.3604175581492934\nEpoch: 4/12, Loss: 2.3314669918697972\nEpoch: 5/12, Loss: 2.3155676853116023\nEpoch: 6/12, Loss: 2.3054449003057\nEpoch: 7/12, Loss: 2.2983417296262845\nEpoch: 8/12, Loss: 2.2929774504282614\nEpoch: 9/12, Loss: 2.2887099773854604\nEpoch: 10/12, Loss: 2.2851798680263626\nEpoch: 11/12, Loss: 2.2821793051528485\nEpoch: 12/12, Loss: 2.2795761335450453\n\n\n\nplot_2d_embeddings(embedding, vocab)\n\n\n\n\n\n\n\n\n\ncreate_name('^a', rnn, embedding, ct)\n\n'^an$'\n\n\n\ncreate_name('^b', rnn, embedding, ct)\n\n'^bhan$'\n\n\n\ncreate_name('^c', rnn, embedding, ct)\n\n'^chan$'\n\n\n\ncreate_name('^d', rnn, embedding, ct)\n\n'^dan$'\n\n\n\ncreate_name('^n', rnn, embedding, ct)\n\n'^n$'" }, { "objectID": "posts/siren-paper.html", @@ -207,7 +207,7 @@ "href": "posts/2022-10-25-mogp.html", "title": "Multi-output Gaussian Process", "section": "", - "text": "In this notebook, we cover multi-output GPs. The presentation follows the excellent video from GPSS\n\nimport numpy as np\nfrom scipy.stats import multivariate_normal\n%matplotlib inline\n%config InlineBackend.figure_format = 'svg'\nimport matplotlib\n#matplotlib.rcParams['figure.figsize'] = (8,6)\nfrom matplotlib import pyplot as plt\nimport GPy\n\n\nICM\n\\(u \\sim GP (0, k)\\)\nsample from u to get a sample \\(u^1\\)\n\\(f_1(x) = a^1_1 u^1(x)\\)\n\\(f_2(x) = a^1_2 u^1(x)\\)\n\nX = np.linspace(-3.,3., 50)\n\n\nkernel = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=2.)\n\n\nkernel\n\n\n\n\n\n\n\nrbf.\nvalue\nconstraints\npriors\n\n\nvariance\n1.0\n+ve\n\n\n\nlengthscale\n2.0\n+ve\n\n\n\n\n\n\n\n\ndef jitter(C, j = 1e-6):\n return C + np.eye(len(C))*j\n\n\ncov = jitter(kernel.K(X.reshape(-1, 1)))\n\n\nplt.imshow(cov)\n\n\n\n\n\n\n\n\n\nmvn = multivariate_normal(cov=cov)\n\n\nu1 = mvn.rvs(random_state=0)\nplt.plot(X, u1)\n\n\n\n\n\n\n\n\n\na11 = 0.9\na12 = 0.7\n\na = np.array([a11, a12]).reshape(-1, 1)\n\n\na\n\narray([[0.9],\n [0.7]])\n\n\n\nB = a@a.T\nB\n\narray([[0.81, 0.63],\n [0.63, 0.49]])\n\n\n\ncov_f = np.kron(B, cov)\n\n\nplt.imshow(cov_f, cmap='Purples')\nplt.colorbar()\n\n\n\n\n\n\n\n\n\nf_sample = multivariate_normal(cov=jitter(cov_f)).rvs(size=500)\nf1_samples, f2_samples = f_sample[:, :50], f_sample[:, 50:]\n\n\n#plt.plot(X, u1, label=\"u1\")\nfor i in range(2):\n plt.plot(X, f1_samples[i], color='g') \n plt.plot(X, f2_samples[i], color='r' )\n\n\n\n\n\n\n\n\n\nf1_samples[i]/f2_samples[i]\n\narray([1.28521323, 1.2870487 , 1.28169798, 1.29387391, 1.28381124,\n 1.29063798, 1.28399272, 1.28787108, 1.27634933, 1.29367057,\n 1.19405718, 0.81421541, 1.29366628, 1.23932848, 1.28601429,\n 1.31178054, 1.27596873, 1.28139033, 1.28548127, 1.28874727,\n 1.288544 , 1.28851575, 1.27706874, 1.28929381, 1.27167387,\n 1.30216154, 1.28769528, 1.28397652, 1.2896767 , 1.29357874,\n 1.28743778, 1.28867757, 1.29135504, 1.28085954, 1.27832016,\n 1.29113682, 1.28346876, 1.28115477, 1.28579679, 1.28664088,\n 1.2836771 , 1.28690568, 1.28521466, 1.28474094, 1.28147929,\n 1.28752966, 1.28577663, 1.28154063, 1.28312776, 1.2869964 ])\n\n\n\n## Learning in MOGP setting\n\nf1_dataset = f1_samples[4]\nf2_dataset = f2_samples[4]\n\n\nplt.plot(X, f1_dataset, label='f1')\nplt.plot(X, f2_dataset, label='f2')\nplt.legend()\n\n\n\n\n\n\n\n\n\n## What all we want to learn:\n\n# 1. GP kernel parameters\n# 2. a11, a12\n\n\nimport jax\nimport jax.numpy as jnp\nfrom jax.config import config\nconfig.update(\"jax_enable_x64\", True)\nimport tensorflow_probability.substrates.jax as tfp\n\n\nf = jnp.hstack([f1_dataset, f2_dataset])\n\n\ndef sqexp(a, b, var=1.0, ls=4):\n diff = (a-b)/ls\n d = jnp.sum(diff ** 2)\n return var*jnp.exp(-0.5 * d)\n\ndef all_pairs(f):\n f = jax.vmap(f, in_axes= (None, 0, None, None))\n f = jax. vmap (f, in_axes= (0, None, None, None))\n return f\n\n\nkernel.K(X.reshape(-1, 1))\n\narray([[1. , 0.99812754, 0.99253116, ..., 0.01592046, 0.01332383,\n 0.011109 ],\n [0.99812754, 1. , 0.99812754, ..., 0.01895197, 0.01592046,\n 0.01332383],\n [0.99253116, 0.99812754, 1. , ..., 0.02247631, 0.01895197,\n 0.01592046],\n ...,\n [0.01592046, 0.01895197, 0.02247631, ..., 1. , 0.99812754,\n 0.99253116],\n [0.01332383, 0.01592046, 0.01895197, ..., 0.99812754, 1. ,\n 0.99812754],\n [0.011109 , 0.01332383, 0.01592046, ..., 0.99253116, 0.99812754,\n 1. ]])\n\n\n\nnp.allclose(np.array(all_pairs(sqexp)(X, X, 1.0, 2.0)), kernel.K(X.reshape(-1, 1)))\n\nTrue\n\n\n\nrank = 1\noutput_dim = 2\nA = jax.random.normal(key=jax.random.PRNGKey(0), shape=(output_dim,rank))/10.0\nA@A.T, A\n\n(DeviceArray([[ 0.03298171, -0.01370936],\n [-0.01370936, 0.00569851]], dtype=float64),\n DeviceArray([[ 0.18160867],\n [-0.07548848]], dtype=float64))\n\n\n\noutput_dim = 2\nrank = 4\nA = jax.random.normal(key=jax.random.PRNGKey(0), shape=(output_dim,rank))/2.0\nA@A.T\n\nDeviceArray([[ 1.24957827, -0.04698574],\n [-0.04698574, 0.57577417]], dtype=float64)\n\n\n\ndef covariance_f(var, ls, A):\n \"\"\"\n A: (output_dim, rank) \n A can be generated as:\n A = jax.random.normal(key=jax.random.PRNGKey(0), shape=(output_dim,rank))\n \"\"\"\n B = A@A.T\n cov = all_pairs(sqexp)(X, X, var, ls)\n cov_f = jitter(jnp.kron(B, cov))\n return cov_f\n\n\ndef cost(var, ls, A):\n cov_f = covariance_f(var, ls, A)\n dist = tfp.distributions.MultivariateNormalFullCovariance(loc = jnp.zeros_like(f), covariance_matrix = cov_f)\n return -dist.log_prob(f)\n\n\nplt.imshow(covariance_f(1.0, 2.0, A), cmap='Purples')\nplt.colorbar()\n\n\n\n\n\n\n\n\n\ncost(1.0, 2.0, A)\n\nDeviceArray(-431.60947116, dtype=float64)\n\n\n\ncost(1.0, 1.0, A)\n\nDeviceArray(-387.35267033, dtype=float64)\n\n\n\ngrads = jax.grad(cost, argnums=[0, 1, 2])(0.1, 1.0, A)\n\nvar = 0.1\nls = 1.0\n\nlr = 1e-3\n\n\nfor i in range(500):\n grads = jax.grad(cost, argnums=[0, 1, 2])(var, ls, A)\n var = var-lr*grads[0]\n ls = ls-lr*grads[1]\n A = A-lr*grads[2]\n if i%100==0:\n print(i, cost(1.0, 1.0, A), var, ls)\n\n0 -387.06097276826193 0.500429427376359 1.0913929924306696\n100 -306.72979544101435 3.6414838350262055 2.363476650308803\n200 -305.64842462218047 3.514293617054404 2.3873529546968477\n300 -304.7976816183849 3.379382170959892 2.403204858135416\n400 -304.0941499412901 3.236859846397818 2.4140771572105426\n\n\n\nC_learnt = covariance_f(var, ls, A)\nplt.imshow(C_learnt, cmap='Purples')\nplt.colorbar()\n\n\n\n\n\n\n\n\n\ndist = tfp.distributions.MultivariateNormalFullCovariance(covariance_matrix=C_learnt)\nsamples_f1 = dist.sample(sample_shape=(10, ), seed = jax.random.PRNGKey(0))\nfor s in samples_f1:\n plt.plot(X, s[:50], color='k')\nplt.plot(X, f1_dataset)\n\n\n\n\n\n\n\n\n\n\nSLFM\n\ndef covariance_f_SLFM(var1, ls1, A1, var2, ls2, A2):\n \"\"\"\n\n \"\"\"\n B1 = A1@A1.T\n B2 = A2@A2.T\n cov1 = all_pairs(sqexp)(X, X, var1, ls1)\n cov2 = all_pairs(sqexp)(X, X, var1, ls1)\n cov_f = jitter(jnp.kron(B1, cov1) + jnp.kron(B2, cov2))\n return cov_f\n\n\nrank = 1\na1 = jax.random.normal(key=jax.random.PRNGKey(0), shape=(output_dim,rank))/2.0\na2 = jax.random.normal(key=jax.random.PRNGKey(0), shape=(output_dim,rank))/2.0\n\n\n\nC_SLFM = covariance_f_SLFM(1.0, 2.0, a1@a1.T, 1.0, 4.0, a2@a2.T)\n\n\nplt.imshow(C_SLFM, cmap='Purples')\nplt.colorbar()" + "text": "In this notebook, we cover multi-output GPs. The presentation follows the excellent video from GPSS\n\nimport numpy as np\nfrom scipy.stats import multivariate_normal\n%matplotlib inline\n%config InlineBackend.figure_format = 'svg'\nimport matplotlib\n#matplotlib.rcParams['figure.figsize'] = (8,6)\nfrom matplotlib import pyplot as plt\nimport GPy\n\n\nICM\n\\(u \\sim GP (0, k)\\)\nsample from u to get a sample \\(u^1\\)\n\\(f_1(x) = a^1_1 u^1(x)\\)\n\\(f_2(x) = a^1_2 u^1(x)\\)\n\nX = np.linspace(-3.,3., 50)\n\n\nkernel = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=2.)\n\n\nkernel\n\n\n\n\n\n\nrbf.\nvalue\nconstraints\npriors\n\n\nvariance\n1.0\n+ve\n\n\n\nlengthscale\n2.0\n+ve\n\n\n\n\n\n\n\ndef jitter(C, j = 1e-6):\n return C + np.eye(len(C))*j\n\n\ncov = jitter(kernel.K(X.reshape(-1, 1)))\n\n\nplt.imshow(cov)\n\n\n\n\n\n\n\n\n\nmvn = multivariate_normal(cov=cov)\n\n\nu1 = mvn.rvs(random_state=0)\nplt.plot(X, u1)\n\n\n\n\n\n\n\n\n\na11 = 0.9\na12 = 0.7\n\na = np.array([a11, a12]).reshape(-1, 1)\n\n\na\n\narray([[0.9],\n [0.7]])\n\n\n\nB = a@a.T\nB\n\narray([[0.81, 0.63],\n [0.63, 0.49]])\n\n\n\ncov_f = np.kron(B, cov)\n\n\nplt.imshow(cov_f, cmap='Purples')\nplt.colorbar()\n\n\n\n\n\n\n\n\n\nf_sample = multivariate_normal(cov=jitter(cov_f)).rvs(size=500)\nf1_samples, f2_samples = f_sample[:, :50], f_sample[:, 50:]\n\n\n#plt.plot(X, u1, label=\"u1\")\nfor i in range(2):\n plt.plot(X, f1_samples[i], color='g') \n plt.plot(X, f2_samples[i], color='r' )\n\n\n\n\n\n\n\n\n\nf1_samples[i]/f2_samples[i]\n\narray([1.28521323, 1.2870487 , 1.28169798, 1.29387391, 1.28381124,\n 1.29063798, 1.28399272, 1.28787108, 1.27634933, 1.29367057,\n 1.19405718, 0.81421541, 1.29366628, 1.23932848, 1.28601429,\n 1.31178054, 1.27596873, 1.28139033, 1.28548127, 1.28874727,\n 1.288544 , 1.28851575, 1.27706874, 1.28929381, 1.27167387,\n 1.30216154, 1.28769528, 1.28397652, 1.2896767 , 1.29357874,\n 1.28743778, 1.28867757, 1.29135504, 1.28085954, 1.27832016,\n 1.29113682, 1.28346876, 1.28115477, 1.28579679, 1.28664088,\n 1.2836771 , 1.28690568, 1.28521466, 1.28474094, 1.28147929,\n 1.28752966, 1.28577663, 1.28154063, 1.28312776, 1.2869964 ])\n\n\n\n## Learning in MOGP setting\n\nf1_dataset = f1_samples[4]\nf2_dataset = f2_samples[4]\n\n\nplt.plot(X, f1_dataset, label='f1')\nplt.plot(X, f2_dataset, label='f2')\nplt.legend()\n\n\n\n\n\n\n\n\n\n## What all we want to learn:\n\n# 1. GP kernel parameters\n# 2. a11, a12\n\n\nimport jax\nimport jax.numpy as jnp\nfrom jax.config import config\nconfig.update(\"jax_enable_x64\", True)\nimport tensorflow_probability.substrates.jax as tfp\n\n\nf = jnp.hstack([f1_dataset, f2_dataset])\n\n\ndef sqexp(a, b, var=1.0, ls=4):\n diff = (a-b)/ls\n d = jnp.sum(diff ** 2)\n return var*jnp.exp(-0.5 * d)\n\ndef all_pairs(f):\n f = jax.vmap(f, in_axes= (None, 0, None, None))\n f = jax. vmap (f, in_axes= (0, None, None, None))\n return f\n\n\nkernel.K(X.reshape(-1, 1))\n\narray([[1. , 0.99812754, 0.99253116, ..., 0.01592046, 0.01332383,\n 0.011109 ],\n [0.99812754, 1. , 0.99812754, ..., 0.01895197, 0.01592046,\n 0.01332383],\n [0.99253116, 0.99812754, 1. , ..., 0.02247631, 0.01895197,\n 0.01592046],\n ...,\n [0.01592046, 0.01895197, 0.02247631, ..., 1. , 0.99812754,\n 0.99253116],\n [0.01332383, 0.01592046, 0.01895197, ..., 0.99812754, 1. ,\n 0.99812754],\n [0.011109 , 0.01332383, 0.01592046, ..., 0.99253116, 0.99812754,\n 1. ]])\n\n\n\nnp.allclose(np.array(all_pairs(sqexp)(X, X, 1.0, 2.0)), kernel.K(X.reshape(-1, 1)))\n\nTrue\n\n\n\nrank = 1\noutput_dim = 2\nA = jax.random.normal(key=jax.random.PRNGKey(0), shape=(output_dim,rank))/10.0\nA@A.T, A\n\n(DeviceArray([[ 0.03298171, -0.01370936],\n [-0.01370936, 0.00569851]], dtype=float64),\n DeviceArray([[ 0.18160867],\n [-0.07548848]], dtype=float64))\n\n\n\noutput_dim = 2\nrank = 4\nA = jax.random.normal(key=jax.random.PRNGKey(0), shape=(output_dim,rank))/2.0\nA@A.T\n\nDeviceArray([[ 1.24957827, -0.04698574],\n [-0.04698574, 0.57577417]], dtype=float64)\n\n\n\ndef covariance_f(var, ls, A):\n \"\"\"\n A: (output_dim, rank) \n A can be generated as:\n A = jax.random.normal(key=jax.random.PRNGKey(0), shape=(output_dim,rank))\n \"\"\"\n B = A@A.T\n cov = all_pairs(sqexp)(X, X, var, ls)\n cov_f = jitter(jnp.kron(B, cov))\n return cov_f\n\n\ndef cost(var, ls, A):\n cov_f = covariance_f(var, ls, A)\n dist = tfp.distributions.MultivariateNormalFullCovariance(loc = jnp.zeros_like(f), covariance_matrix = cov_f)\n return -dist.log_prob(f)\n\n\nplt.imshow(covariance_f(1.0, 2.0, A), cmap='Purples')\nplt.colorbar()\n\n\n\n\n\n\n\n\n\ncost(1.0, 2.0, A)\n\nDeviceArray(-431.60947116, dtype=float64)\n\n\n\ncost(1.0, 1.0, A)\n\nDeviceArray(-387.35267033, dtype=float64)\n\n\n\ngrads = jax.grad(cost, argnums=[0, 1, 2])(0.1, 1.0, A)\n\nvar = 0.1\nls = 1.0\n\nlr = 1e-3\n\n\nfor i in range(500):\n grads = jax.grad(cost, argnums=[0, 1, 2])(var, ls, A)\n var = var-lr*grads[0]\n ls = ls-lr*grads[1]\n A = A-lr*grads[2]\n if i%100==0:\n print(i, cost(1.0, 1.0, A), var, ls)\n\n0 -387.06097276826193 0.500429427376359 1.0913929924306696\n100 -306.72979544101435 3.6414838350262055 2.363476650308803\n200 -305.64842462218047 3.514293617054404 2.3873529546968477\n300 -304.7976816183849 3.379382170959892 2.403204858135416\n400 -304.0941499412901 3.236859846397818 2.4140771572105426\n\n\n\nC_learnt = covariance_f(var, ls, A)\nplt.imshow(C_learnt, cmap='Purples')\nplt.colorbar()\n\n\n\n\n\n\n\n\n\ndist = tfp.distributions.MultivariateNormalFullCovariance(covariance_matrix=C_learnt)\nsamples_f1 = dist.sample(sample_shape=(10, ), seed = jax.random.PRNGKey(0))\nfor s in samples_f1:\n plt.plot(X, s[:50], color='k')\nplt.plot(X, f1_dataset)\n\n\n\n\n\n\n\n\n\n\nSLFM\n\ndef covariance_f_SLFM(var1, ls1, A1, var2, ls2, A2):\n \"\"\"\n\n \"\"\"\n B1 = A1@A1.T\n B2 = A2@A2.T\n cov1 = all_pairs(sqexp)(X, X, var1, ls1)\n cov2 = all_pairs(sqexp)(X, X, var1, ls1)\n cov_f = jitter(jnp.kron(B1, cov1) + jnp.kron(B2, cov2))\n return cov_f\n\n\nrank = 1\na1 = jax.random.normal(key=jax.random.PRNGKey(0), shape=(output_dim,rank))/2.0\na2 = jax.random.normal(key=jax.random.PRNGKey(0), shape=(output_dim,rank))/2.0\n\n\n\nC_SLFM = covariance_f_SLFM(1.0, 2.0, a1@a1.T, 1.0, 4.0, a2@a2.T)\n\n\nplt.imshow(C_SLFM, cmap='Purples')\nplt.colorbar()" }, { "objectID": "posts/2017-06-15-linear-regression-prior.html", @@ -242,7 +242,7 @@ "href": "posts/2018-06-26-map-electricity-access.html", "title": "Visualising Electricity Access Over Space and Time", "section": "", - "text": "In this post, I’ll explore electricity access, i.e. globally what fraction of people have access to electricity. Beyond the goal of finding the electricity access, this post will also serve to illustrate how the coolness coefficient of the Python visualisation ecosystem!\nI’ll be using data from World Bank for electricity access. See the image below for the corresponding page.\n\n\nDownloading World Bank data\nNow, a Python package called wbdata provides a fairly easy way to access World Bank data. I’d be using it to get data in Pandas DataFrame.\n\n%matplotlib inline\nimport pandas as pd\nimport wbdata\nimport matplotlib.pyplot as plt\nimport datetime\ndata_date = (datetime.datetime(1990, 1, 1), datetime.datetime(2016, 1, 1))\ndf_elec = wbdata.get_data(\"EG.ELC.ACCS.ZS\", pandas=True, data_date=data_date)\n\n\ndf_elec.head()\n\ncountry date\nArab World 2016 88.768654\n 2015 88.517967\n 2014 88.076774\n 2013 88.389705\n 2012 87.288244\nName: value, dtype: float64\n\n\n\n\nDownloading Geodata and Reading Using GeoPandas\nI’d now be downloading shapefile data for different countries. This will help us to spatially plot the data for the different countries.\n\n!wget http://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_0_countries_lakes.zip\n\n--2018-06-26 15:52:50-- http://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_0_countries_lakes.zip\nResolving naciscdn.org (naciscdn.org)... 146.201.97.163\nConnecting to naciscdn.org (naciscdn.org)|146.201.97.163|:80... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 5077755 (4.8M) [application/x-zip-compressed]\nSaving to: ‘ne_10m_admin_0_countries_lakes.zip’\n\nne_10m_admin_0_coun 100%[===================>] 4.84M 246KB/s in 22s \n\n2018-06-26 15:53:12 (228 KB/s) - ‘ne_10m_admin_0_countries_lakes.zip’ saved [5077755/5077755]\n\n\n\n\nExtracting shapefile\n\nimport zipfile\nzip_ref = zipfile.ZipFile('ne_10m_admin_0_countries_lakes.zip', 'r')\nzip_ref.extractall('.')\nzip_ref.close()\n\n\nimport geopandas as gpd\ngdf = gpd.read_file('ne_10m_admin_0_countries_lakes.shp')[['ADM0_A3', 'geometry']]\n\n\ngdf.head()\n\n\n\n\n\n\n\n\n\nADM0_A3\ngeometry\n\n\n\n\n0\nIDN\n(POLYGON ((117.7036079039552 4.163414542001791...\n\n\n1\nMYS\n(POLYGON ((117.7036079039552 4.163414542001791...\n\n\n2\nCHL\n(POLYGON ((-69.51008875199994 -17.506588197999...\n\n\n3\nBOL\nPOLYGON ((-69.51008875199994 -17.5065881979999...\n\n\n4\nPER\n(POLYGON ((-69.51008875199994 -17.506588197999...\n\n\n\n\n\n\n\n\n\n\n\nVisualising electricity access in 2016\n\nGetting electricity access data for 2016\n\ndf_2016 = df_elec.unstack()[['2016']].dropna()\n\n\ndf_2016.head()\n\n\n\n\n\n\n\n\ndate\n2016\n\n\ncountry\n\n\n\n\n\nAfghanistan\n84.137138\n\n\nAlbania\n100.000000\n\n\nAlgeria\n99.439568\n\n\nAndorra\n100.000000\n\n\nAngola\n40.520607\n\n\n\n\n\n\n\n\nIn order to visualise electricity access data over the map, we would have to join the GeoPandas object gdf and df_elec\n\n\nJoining gdf and df_2016\nNow, gdf uses alpha_3 codes for country names like AFG, etc., whereas df_2016 uses country names. We will thus use pycountry package to get code names corresponding to countries in df_2016 as shown in this StackOverflow post.\n\nimport pycountry\ncountries = {}\nfor country in pycountry.countries:\n countries[country.name] = country.alpha_3\ncodes = [countries.get(country, 'Unknown code') for country in df_2016.index]\ndf_2016['Code'] = codes\n\n\ndf_2016.head()\n\n\n\n\n\n\n\n\ndate\n2016\nCode\n\n\ncountry\n\n\n\n\n\n\nAfghanistan\n84.137138\nAFG\n\n\nAlbania\n100.000000\nALB\n\n\nAlgeria\n99.439568\nDZA\n\n\nAndorra\n100.000000\nAND\n\n\nAngola\n40.520607\nAGO\n\n\n\n\n\n\n\n\nNow, we can join the two data sources\n\nmerged_df_2016 = gpd.GeoDataFrame(pd.merge(gdf, df_2016, left_on='ADM0_A3', right_on='Code'))\n\n\nmerged_df_2016.head()\n\n\n\n\n\n\n\n\n\nADM0_A3\ngeometry\n2016\nCode\n\n\n\n\n0\nIDN\n(POLYGON ((117.7036079039552 4.163414542001791...\n97.620000\nIDN\n\n\n1\nMYS\n(POLYGON ((117.7036079039552 4.163414542001791...\n100.000000\nMYS\n\n\n2\nCHL\n(POLYGON ((-69.51008875199994 -17.506588197999...\n100.000000\nCHL\n\n\n3\nPER\n(POLYGON ((-69.51008875199994 -17.506588197999...\n94.851746\nPER\n\n\n4\nARG\n(POLYGON ((-68.4486097329999 -52.3466170159999...\n100.000000\nARG\n\n\n\n\n\n\n\n\n\n\nFinally plotting!\n\n# Example borrowed from http://ramiro.org/notebook/geopandas-choropleth/\ncmap='OrRd'\nfigsize = (16, 5)\nax = merged_df_2016.plot(column='2016', cmap=cmap, figsize=figsize,legend=True)\ntitle = 'Electricity Access(% of population) in {}'.format('2016')\ngdf[~gdf.ADM0_A3.isin(merged_df_2016.ADM0_A3)].plot(ax=ax, color='#fffafa', hatch='///')\nax.set_title(title, fontdict={'fontsize': 15}, loc='left')\nax.set_axis_off()\n\n\n\n\n\n\n\n\n\n\n\nCreating animation for access across time\n\n!mkdir -p elec_access\n\n\ndef save_png_year(year, path=\"elec_access\"):\n df_year = df_elec.unstack()[['{}'.format(year)]].dropna()\n codes = [countries.get(country, 'Unknown code') for country in df_year.index]\n df_year['Code'] = codes\n merged_df_year = gpd.GeoDataFrame(pd.merge(gdf, df_year, left_on='ADM0_A3', right_on='Code'))\n figsize = (16, 5)\n ax = merged_df_year.plot(column='{}'.format(year), cmap=cmap, figsize=figsize,legend=True,vmin=0.0, vmax=100.0)\n title = 'Electricity Access(% of population) in {}'.format(year)\n gdf[~gdf.ADM0_A3.isin(merged_df_year.ADM0_A3)].plot(ax=ax, color='#fffafa', hatch='///')\n ax.set_title(title, fontdict={'fontsize': 15}, loc='left')\n ax.set_axis_off()\n plt.savefig('{}/{}.png'.format(path, year), dpi=300)\n plt.close()\n\n\nfor year in range(1990, 2017):\n save_png_year(year)\n\n\n# Borrowed from http://www.kevinwampler.com/blog/2016/09/10/creating-animated-gifs-using-python.html\ndef create_gifv(input_files, output_base_name, fps):\n import imageio\n output_extensions = [\"gif\"]\n input_filenames = ['elec_access/{}.png'.format(year) for year in range(1990, 2017)]\n\n poster_writer = imageio.get_writer(\"{}.png\".format(output_base_name), mode='i')\n video_writers = [\n imageio.get_writer(\"{}.{}\".format(output_base_name, ext), mode='I', fps=fps)\n for ext in output_extensions]\n\n is_first = True\n for filename in input_filenames:\n img = imageio.imread(filename)\n\n for writer in video_writers:\n writer.append_data(img)\n if is_first:\n poster_writer.append_data(img)\n\n is_first = False\n\n for writer in video_writers + [poster_writer]:\n writer.close()\n\n\ncreate_gifv(\"elec_access/*.png\", \"electricity_access\", 4)\n\n\nAcross Africa and SE Asia, one can clearly see a gradual improvement in access! Hope you had fun reading this post :)" + "text": "In this post, I’ll explore electricity access, i.e. globally what fraction of people have access to electricity. Beyond the goal of finding the electricity access, this post will also serve to illustrate how the coolness coefficient of the Python visualisation ecosystem!\nI’ll be using data from World Bank for electricity access. See the image below for the corresponding page.\n\n\nDownloading World Bank data\nNow, a Python package called wbdata provides a fairly easy way to access World Bank data. I’d be using it to get data in Pandas DataFrame.\n\n%matplotlib inline\nimport pandas as pd\nimport wbdata\nimport matplotlib.pyplot as plt\nimport datetime\ndata_date = (datetime.datetime(1990, 1, 1), datetime.datetime(2016, 1, 1))\ndf_elec = wbdata.get_data(\"EG.ELC.ACCS.ZS\", pandas=True, data_date=data_date)\n\n\ndf_elec.head()\n\ncountry date\nArab World 2016 88.768654\n 2015 88.517967\n 2014 88.076774\n 2013 88.389705\n 2012 87.288244\nName: value, dtype: float64\n\n\n\n\nDownloading Geodata and Reading Using GeoPandas\nI’d now be downloading shapefile data for different countries. This will help us to spatially plot the data for the different countries.\n\n!wget http://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_0_countries_lakes.zip\n\n--2018-06-26 15:52:50-- http://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_0_countries_lakes.zip\nResolving naciscdn.org (naciscdn.org)... 146.201.97.163\nConnecting to naciscdn.org (naciscdn.org)|146.201.97.163|:80... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 5077755 (4.8M) [application/x-zip-compressed]\nSaving to: ‘ne_10m_admin_0_countries_lakes.zip’\n\nne_10m_admin_0_coun 100%[===================>] 4.84M 246KB/s in 22s \n\n2018-06-26 15:53:12 (228 KB/s) - ‘ne_10m_admin_0_countries_lakes.zip’ saved [5077755/5077755]\n\n\n\n\nExtracting shapefile\n\nimport zipfile\nzip_ref = zipfile.ZipFile('ne_10m_admin_0_countries_lakes.zip', 'r')\nzip_ref.extractall('.')\nzip_ref.close()\n\n\nimport geopandas as gpd\ngdf = gpd.read_file('ne_10m_admin_0_countries_lakes.shp')[['ADM0_A3', 'geometry']]\n\n\ngdf.head()\n\n\n\n\n\n\n\n\nADM0_A3\ngeometry\n\n\n\n\n0\nIDN\n(POLYGON ((117.7036079039552 4.163414542001791...\n\n\n1\nMYS\n(POLYGON ((117.7036079039552 4.163414542001791...\n\n\n2\nCHL\n(POLYGON ((-69.51008875199994 -17.506588197999...\n\n\n3\nBOL\nPOLYGON ((-69.51008875199994 -17.5065881979999...\n\n\n4\nPER\n(POLYGON ((-69.51008875199994 -17.506588197999...\n\n\n\n\n\n\n\n\n\n\nVisualising electricity access in 2016\n\nGetting electricity access data for 2016\n\ndf_2016 = df_elec.unstack()[['2016']].dropna()\n\n\ndf_2016.head()\n\n\n\n\n\n\n\ndate\n2016\n\n\ncountry\n\n\n\n\n\nAfghanistan\n84.137138\n\n\nAlbania\n100.000000\n\n\nAlgeria\n99.439568\n\n\nAndorra\n100.000000\n\n\nAngola\n40.520607\n\n\n\n\n\n\n\nIn order to visualise electricity access data over the map, we would have to join the GeoPandas object gdf and df_elec\n\n\nJoining gdf and df_2016\nNow, gdf uses alpha_3 codes for country names like AFG, etc., whereas df_2016 uses country names. We will thus use pycountry package to get code names corresponding to countries in df_2016 as shown in this StackOverflow post.\n\nimport pycountry\ncountries = {}\nfor country in pycountry.countries:\n countries[country.name] = country.alpha_3\ncodes = [countries.get(country, 'Unknown code') for country in df_2016.index]\ndf_2016['Code'] = codes\n\n\ndf_2016.head()\n\n\n\n\n\n\n\ndate\n2016\nCode\n\n\ncountry\n\n\n\n\n\n\nAfghanistan\n84.137138\nAFG\n\n\nAlbania\n100.000000\nALB\n\n\nAlgeria\n99.439568\nDZA\n\n\nAndorra\n100.000000\nAND\n\n\nAngola\n40.520607\nAGO\n\n\n\n\n\n\n\nNow, we can join the two data sources\n\nmerged_df_2016 = gpd.GeoDataFrame(pd.merge(gdf, df_2016, left_on='ADM0_A3', right_on='Code'))\n\n\nmerged_df_2016.head()\n\n\n\n\n\n\n\n\nADM0_A3\ngeometry\n2016\nCode\n\n\n\n\n0\nIDN\n(POLYGON ((117.7036079039552 4.163414542001791...\n97.620000\nIDN\n\n\n1\nMYS\n(POLYGON ((117.7036079039552 4.163414542001791...\n100.000000\nMYS\n\n\n2\nCHL\n(POLYGON ((-69.51008875199994 -17.506588197999...\n100.000000\nCHL\n\n\n3\nPER\n(POLYGON ((-69.51008875199994 -17.506588197999...\n94.851746\nPER\n\n\n4\nARG\n(POLYGON ((-68.4486097329999 -52.3466170159999...\n100.000000\nARG\n\n\n\n\n\n\n\n\n\nFinally plotting!\n\n# Example borrowed from http://ramiro.org/notebook/geopandas-choropleth/\ncmap='OrRd'\nfigsize = (16, 5)\nax = merged_df_2016.plot(column='2016', cmap=cmap, figsize=figsize,legend=True)\ntitle = 'Electricity Access(% of population) in {}'.format('2016')\ngdf[~gdf.ADM0_A3.isin(merged_df_2016.ADM0_A3)].plot(ax=ax, color='#fffafa', hatch='///')\nax.set_title(title, fontdict={'fontsize': 15}, loc='left')\nax.set_axis_off()\n\n\n\n\n\n\n\n\n\n\n\nCreating animation for access across time\n\n!mkdir -p elec_access\n\n\ndef save_png_year(year, path=\"elec_access\"):\n df_year = df_elec.unstack()[['{}'.format(year)]].dropna()\n codes = [countries.get(country, 'Unknown code') for country in df_year.index]\n df_year['Code'] = codes\n merged_df_year = gpd.GeoDataFrame(pd.merge(gdf, df_year, left_on='ADM0_A3', right_on='Code'))\n figsize = (16, 5)\n ax = merged_df_year.plot(column='{}'.format(year), cmap=cmap, figsize=figsize,legend=True,vmin=0.0, vmax=100.0)\n title = 'Electricity Access(% of population) in {}'.format(year)\n gdf[~gdf.ADM0_A3.isin(merged_df_year.ADM0_A3)].plot(ax=ax, color='#fffafa', hatch='///')\n ax.set_title(title, fontdict={'fontsize': 15}, loc='left')\n ax.set_axis_off()\n plt.savefig('{}/{}.png'.format(path, year), dpi=300)\n plt.close()\n\n\nfor year in range(1990, 2017):\n save_png_year(year)\n\n\n# Borrowed from http://www.kevinwampler.com/blog/2016/09/10/creating-animated-gifs-using-python.html\ndef create_gifv(input_files, output_base_name, fps):\n import imageio\n output_extensions = [\"gif\"]\n input_filenames = ['elec_access/{}.png'.format(year) for year in range(1990, 2017)]\n\n poster_writer = imageio.get_writer(\"{}.png\".format(output_base_name), mode='i')\n video_writers = [\n imageio.get_writer(\"{}.{}\".format(output_base_name, ext), mode='I', fps=fps)\n for ext in output_extensions]\n\n is_first = True\n for filename in input_filenames:\n img = imageio.imread(filename)\n\n for writer in video_writers:\n writer.append_data(img)\n if is_first:\n poster_writer.append_data(img)\n\n is_first = False\n\n for writer in video_writers + [poster_writer]:\n writer.close()\n\n\ncreate_gifv(\"elec_access/*.png\", \"electricity_access\", 4)\n\n\nAcross Africa and SE Asia, one can clearly see a gradual improvement in access! Hope you had fun reading this post :)" }, { "objectID": "posts/2024-forecast.html", @@ -312,7 +312,7 @@ "href": "posts/2017-04-19-nmf-out-matrix.html", "title": "Out of matrix non-negative matrix factorisation", "section": "", - "text": "I have written a bunch of posts on this blog about non-negative matrix factorisation (NNMF). However, all of them involved the test user to be a part of the matrix that we factorise to learn the latent factors. Is that always the case? Read on to find more!\n\nStandard Problem\nOur goal is given a matrix A, decompose it into two non-negative factors, as follows:\n$ A_{M N} W_{M K} H_{K N} $, such that $ W_{M K} $ and $ H_{K N} $\n\n\n\nOur Problem- Out of matrix factorisation\nImagine that we have trained the model for M-1 users on N movies. Now, the \\(M^{th}\\) user has rated some movies. Do we retrain the model from scratch to consider \\(M^{th}\\) user? This can be a very expensive operation!\n\nInstead, as shown in above figure, we will learn the user factor for the \\(M^{th}\\) user. We can do this the shared movie factor (H) has already been learnt.\nWe can formulate as follows:\n\\[\nA[M,:] = W[M,:]H\n\\]\nTaking transpose both sides\n\\[\nA[M,:]^T = H^T W[M,:]^T\n\\]\nHowever, \\(A[M,:]^T\\) will have missing entries. Thus, we can mask those entries from the calculation as shown below.\n\nThus, we can write\n\\[\nW[M,:]^T = \\mathrm{Least Squares} (H^T[Mask], A[M,:]^T[Mask])\n\\]\nIf instead we want the factors to be non-negative, we can use non-negative least squares instead of usual least squares for this estimation.\n\n\nCode example\nI’ll now present a simple code example to illustrate the procedure.\n\nDefining matrix A\n\nimport numpy as np\nimport pandas as pd\n\nM, N = 20, 10\n\nnp.random.seed(0)\nA_orig = np.abs(np.random.uniform(low=0.0, high=1.0, size=(M,N)))\npd.DataFrame(A_orig).head()\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n\n\n\n\n0\n0.548814\n0.715189\n0.602763\n0.544883\n0.423655\n0.645894\n0.437587\n0.891773\n0.963663\n0.383442\n\n\n1\n0.791725\n0.528895\n0.568045\n0.925597\n0.071036\n0.087129\n0.020218\n0.832620\n0.778157\n0.870012\n\n\n2\n0.978618\n0.799159\n0.461479\n0.780529\n0.118274\n0.639921\n0.143353\n0.944669\n0.521848\n0.414662\n\n\n3\n0.264556\n0.774234\n0.456150\n0.568434\n0.018790\n0.617635\n0.612096\n0.616934\n0.943748\n0.681820\n\n\n4\n0.359508\n0.437032\n0.697631\n0.060225\n0.666767\n0.670638\n0.210383\n0.128926\n0.315428\n0.363711\n\n\n\n\n\n\n\n\n\n\nMasking a few entries\n\nA = A_orig.copy()\nA[0, 0] = np.NAN\nA[3, 1] = np.NAN\nA[6, 3] = np.NAN\n\n# Masking for last user. \nA[19, 2] = np.NAN\nA[19, 7] = np.NAN\n\nWe will be using A2 (first 19 users) matrix for learning the movie factors and the user factors for the 19 users\n\nA2 = A[:-1,:]\nA2.shape\n\n(19, 10)\n\n\n\nA_df = pd.DataFrame(A)\nA_df.head()\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n\n\n\n\n0\nNaN\n0.715189\n0.602763\n0.544883\n0.423655\n0.645894\n0.437587\n0.891773\n0.963663\n0.383442\n\n\n1\n0.791725\n0.528895\n0.568045\n0.925597\n0.071036\n0.087129\n0.020218\n0.832620\n0.778157\n0.870012\n\n\n2\n0.978618\n0.799159\n0.461479\n0.780529\n0.118274\n0.639921\n0.143353\n0.944669\n0.521848\n0.414662\n\n\n3\n0.264556\nNaN\n0.456150\n0.568434\n0.018790\n0.617635\n0.612096\n0.616934\n0.943748\n0.681820\n\n\n4\n0.359508\n0.437032\n0.697631\n0.060225\n0.666767\n0.670638\n0.210383\n0.128926\n0.315428\n0.363711\n\n\n\n\n\n\n\n\n\n\nDefining matrices W and H (learning on M-1 users and N movies)\n\nK = 4\nW = np.abs(np.random.uniform(low=0, high=1, size=(M-1, K)))\nH = np.abs(np.random.uniform(low=0, high=1, size=(K, N)))\nW = np.divide(W, K*W.max())\nH = np.divide(H, K*H.max())\n\n\npd.DataFrame(W).head()\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n\n\n\n\n0\n0.078709\n0.175784\n0.095359\n0.045339\n\n\n1\n0.006230\n0.016976\n0.171505\n0.114531\n\n\n2\n0.135453\n0.226355\n0.250000\n0.054753\n\n\n3\n0.167387\n0.066473\n0.005213\n0.191444\n\n\n4\n0.080785\n0.096801\n0.148514\n0.209789\n\n\n\n\n\n\n\n\n\npd.DataFrame(H).head()\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n\n\n\n\n0\n0.239700\n0.203498\n0.160529\n0.222617\n0.074611\n0.216164\n0.157328\n0.003370\n0.088415\n0.037721\n\n\n1\n0.250000\n0.121806\n0.126649\n0.162827\n0.093851\n0.034858\n0.209333\n0.048340\n0.130195\n0.057117\n\n\n2\n0.024914\n0.219537\n0.247731\n0.244654\n0.230833\n0.197093\n0.084828\n0.020651\n0.103694\n0.059133\n\n\n3\n0.033735\n0.013604\n0.184756\n0.002910\n0.196210\n0.037417\n0.020248\n0.022815\n0.171121\n0.062477\n\n\n\n\n\n\n\n\n\n\nDefining the cost that we want to minimise\n\ndef cost(A, W, H):\n from numpy import linalg\n WH = np.dot(W, H)\n A_WH = A-WH\n return linalg.norm(A_WH, 'fro')\n\nHowever, since A has missing entries, we have to define the cost in terms of the entries present in A\n\ndef cost(A, W, H):\n from numpy import linalg\n mask = pd.DataFrame(A).notnull().values\n WH = np.dot(W, H)\n WH_mask = WH[mask]\n A_mask = A[mask]\n A_WH_mask = A_mask-WH_mask\n # Since now A_WH_mask is a vector, we use L2 instead of Frobenius norm for matrix\n return linalg.norm(A_WH_mask, 2)\n\nLet us just try to see the cost of the initial set of values of W and H we randomly assigned. Notice, we pass A2!\n\ncost(A2, W, H)\n\n7.2333001567031294\n\n\n\n\nAlternating NNLS procedure\n\nnum_iter = 1000\nnum_display_cost = max(int(num_iter/10), 1)\nfrom scipy.optimize import nnls\n\nfor i in range(num_iter):\n if i%2 ==0:\n # Learn H, given A and W\n for j in range(N):\n mask_rows = pd.Series(A2[:,j]).notnull()\n H[:,j] = nnls(W[mask_rows], A2[:,j][mask_rows])[0]\n else:\n for j in range(M-1):\n mask_rows = pd.Series(A2[j,:]).notnull()\n W[j,:] = nnls(H.transpose()[mask_rows], A2[j,:][mask_rows])[0]\n WH = np.dot(W, H)\n c = cost(A2, W, H)\n if i%num_display_cost==0:\n print i, c\n \n\n0 3.74162948918\n100 2.25416363991\n200 2.25258698617\n300 2.25229707846\n400 2.25131714233\n500 2.24968386447\n600 2.24967129897\n700 2.24965023589\n800 2.24961410381\n900 2.24955008837\n\n\n\nA_pred = pd.DataFrame(np.dot(W, H))\nA_pred.head()\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n\n\n\n\n0\n0.590301\n0.653038\n0.531940\n0.623272\n0.584763\n0.630835\n0.574041\n0.700139\n0.841706\n0.565808\n\n\n1\n0.802724\n0.532299\n0.482430\n1.017968\n0.149923\n0.449312\n0.097775\n0.708727\n0.506595\n0.846219\n\n\n2\n0.764296\n0.563711\n0.527292\n0.905236\n0.306275\n0.505674\n0.223192\n0.705882\n0.604356\n0.757878\n\n\n3\n0.373539\n0.745239\n0.334948\n0.663219\n0.132686\n0.551844\n0.760420\n0.598546\n0.808108\n0.627732\n\n\n4\n0.467623\n0.331457\n0.617263\n0.239449\n0.634455\n0.370041\n0.294412\n0.288539\n0.484822\n0.126945\n\n\n\n\n\n\n\n\n\n\nLearning home factors for \\(M^{th}\\) home\n\nA_m = A[-1,:]\nA_m_transpose = A_m.T\nmask = ~np.isnan(A_m_transpose)\nW_m = nnls(H.T[mask], A_m_transpose[mask])[0]\n\n\nW_m\n\narray([ 0.12248095, 0.20778687, 0.15185613, 0. ])\n\n\n\n\nPredicting for \\(M^{th}\\) home\n\nratings_m_home = np.dot(H.T, W_m)\n\n\nratings_m_home[~mask]\n\narray([ 0.4245947 , 0.57447552])\n\n\n\nA_orig[-1,:][~mask]\n\narray([ 0.18619301, 0.25435648])\n\n\nThere you go, we are able to get ratings for the \\(M^{th}\\) user for the movies that they have not seen. We only trained the model on the other users! Ofcourse, these numbers might not look so impressive. However, this was just a toy example based on random data. In reality, we could expect better results!" + "text": "I have written a bunch of posts on this blog about non-negative matrix factorisation (NNMF). However, all of them involved the test user to be a part of the matrix that we factorise to learn the latent factors. Is that always the case? Read on to find more!\n\nStandard Problem\nOur goal is given a matrix A, decompose it into two non-negative factors, as follows:\n$ A_{M N} W_{M K} H_{K N} $, such that $ W_{M K} $ and $ H_{K N} $\n\n\n\nOur Problem- Out of matrix factorisation\nImagine that we have trained the model for M-1 users on N movies. Now, the \\(M^{th}\\) user has rated some movies. Do we retrain the model from scratch to consider \\(M^{th}\\) user? This can be a very expensive operation!\n\nInstead, as shown in above figure, we will learn the user factor for the \\(M^{th}\\) user. We can do this the shared movie factor (H) has already been learnt.\nWe can formulate as follows:\n\\[\nA[M,:] = W[M,:]H\n\\]\nTaking transpose both sides\n\\[\nA[M,:]^T = H^T W[M,:]^T\n\\]\nHowever, \\(A[M,:]^T\\) will have missing entries. Thus, we can mask those entries from the calculation as shown below.\n\nThus, we can write\n\\[\nW[M,:]^T = \\mathrm{Least Squares} (H^T[Mask], A[M,:]^T[Mask])\n\\]\nIf instead we want the factors to be non-negative, we can use non-negative least squares instead of usual least squares for this estimation.\n\n\nCode example\nI’ll now present a simple code example to illustrate the procedure.\n\nDefining matrix A\n\nimport numpy as np\nimport pandas as pd\n\nM, N = 20, 10\n\nnp.random.seed(0)\nA_orig = np.abs(np.random.uniform(low=0.0, high=1.0, size=(M,N)))\npd.DataFrame(A_orig).head()\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n\n\n\n\n0\n0.548814\n0.715189\n0.602763\n0.544883\n0.423655\n0.645894\n0.437587\n0.891773\n0.963663\n0.383442\n\n\n1\n0.791725\n0.528895\n0.568045\n0.925597\n0.071036\n0.087129\n0.020218\n0.832620\n0.778157\n0.870012\n\n\n2\n0.978618\n0.799159\n0.461479\n0.780529\n0.118274\n0.639921\n0.143353\n0.944669\n0.521848\n0.414662\n\n\n3\n0.264556\n0.774234\n0.456150\n0.568434\n0.018790\n0.617635\n0.612096\n0.616934\n0.943748\n0.681820\n\n\n4\n0.359508\n0.437032\n0.697631\n0.060225\n0.666767\n0.670638\n0.210383\n0.128926\n0.315428\n0.363711\n\n\n\n\n\n\n\n\n\nMasking a few entries\n\nA = A_orig.copy()\nA[0, 0] = np.NAN\nA[3, 1] = np.NAN\nA[6, 3] = np.NAN\n\n# Masking for last user. \nA[19, 2] = np.NAN\nA[19, 7] = np.NAN\n\nWe will be using A2 (first 19 users) matrix for learning the movie factors and the user factors for the 19 users\n\nA2 = A[:-1,:]\nA2.shape\n\n(19, 10)\n\n\n\nA_df = pd.DataFrame(A)\nA_df.head()\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n\n\n\n\n0\nNaN\n0.715189\n0.602763\n0.544883\n0.423655\n0.645894\n0.437587\n0.891773\n0.963663\n0.383442\n\n\n1\n0.791725\n0.528895\n0.568045\n0.925597\n0.071036\n0.087129\n0.020218\n0.832620\n0.778157\n0.870012\n\n\n2\n0.978618\n0.799159\n0.461479\n0.780529\n0.118274\n0.639921\n0.143353\n0.944669\n0.521848\n0.414662\n\n\n3\n0.264556\nNaN\n0.456150\n0.568434\n0.018790\n0.617635\n0.612096\n0.616934\n0.943748\n0.681820\n\n\n4\n0.359508\n0.437032\n0.697631\n0.060225\n0.666767\n0.670638\n0.210383\n0.128926\n0.315428\n0.363711\n\n\n\n\n\n\n\n\n\nDefining matrices W and H (learning on M-1 users and N movies)\n\nK = 4\nW = np.abs(np.random.uniform(low=0, high=1, size=(M-1, K)))\nH = np.abs(np.random.uniform(low=0, high=1, size=(K, N)))\nW = np.divide(W, K*W.max())\nH = np.divide(H, K*H.max())\n\n\npd.DataFrame(W).head()\n\n\n\n\n\n\n\n0\n1\n2\n3\n\n\n\n\n0\n0.078709\n0.175784\n0.095359\n0.045339\n\n\n1\n0.006230\n0.016976\n0.171505\n0.114531\n\n\n2\n0.135453\n0.226355\n0.250000\n0.054753\n\n\n3\n0.167387\n0.066473\n0.005213\n0.191444\n\n\n4\n0.080785\n0.096801\n0.148514\n0.209789\n\n\n\n\n\n\n\n\npd.DataFrame(H).head()\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n\n\n\n\n0\n0.239700\n0.203498\n0.160529\n0.222617\n0.074611\n0.216164\n0.157328\n0.003370\n0.088415\n0.037721\n\n\n1\n0.250000\n0.121806\n0.126649\n0.162827\n0.093851\n0.034858\n0.209333\n0.048340\n0.130195\n0.057117\n\n\n2\n0.024914\n0.219537\n0.247731\n0.244654\n0.230833\n0.197093\n0.084828\n0.020651\n0.103694\n0.059133\n\n\n3\n0.033735\n0.013604\n0.184756\n0.002910\n0.196210\n0.037417\n0.020248\n0.022815\n0.171121\n0.062477\n\n\n\n\n\n\n\n\n\nDefining the cost that we want to minimise\n\ndef cost(A, W, H):\n from numpy import linalg\n WH = np.dot(W, H)\n A_WH = A-WH\n return linalg.norm(A_WH, 'fro')\n\nHowever, since A has missing entries, we have to define the cost in terms of the entries present in A\n\ndef cost(A, W, H):\n from numpy import linalg\n mask = pd.DataFrame(A).notnull().values\n WH = np.dot(W, H)\n WH_mask = WH[mask]\n A_mask = A[mask]\n A_WH_mask = A_mask-WH_mask\n # Since now A_WH_mask is a vector, we use L2 instead of Frobenius norm for matrix\n return linalg.norm(A_WH_mask, 2)\n\nLet us just try to see the cost of the initial set of values of W and H we randomly assigned. Notice, we pass A2!\n\ncost(A2, W, H)\n\n7.2333001567031294\n\n\n\n\nAlternating NNLS procedure\n\nnum_iter = 1000\nnum_display_cost = max(int(num_iter/10), 1)\nfrom scipy.optimize import nnls\n\nfor i in range(num_iter):\n if i%2 ==0:\n # Learn H, given A and W\n for j in range(N):\n mask_rows = pd.Series(A2[:,j]).notnull()\n H[:,j] = nnls(W[mask_rows], A2[:,j][mask_rows])[0]\n else:\n for j in range(M-1):\n mask_rows = pd.Series(A2[j,:]).notnull()\n W[j,:] = nnls(H.transpose()[mask_rows], A2[j,:][mask_rows])[0]\n WH = np.dot(W, H)\n c = cost(A2, W, H)\n if i%num_display_cost==0:\n print i, c\n \n\n0 3.74162948918\n100 2.25416363991\n200 2.25258698617\n300 2.25229707846\n400 2.25131714233\n500 2.24968386447\n600 2.24967129897\n700 2.24965023589\n800 2.24961410381\n900 2.24955008837\n\n\n\nA_pred = pd.DataFrame(np.dot(W, H))\nA_pred.head()\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n\n\n\n\n0\n0.590301\n0.653038\n0.531940\n0.623272\n0.584763\n0.630835\n0.574041\n0.700139\n0.841706\n0.565808\n\n\n1\n0.802724\n0.532299\n0.482430\n1.017968\n0.149923\n0.449312\n0.097775\n0.708727\n0.506595\n0.846219\n\n\n2\n0.764296\n0.563711\n0.527292\n0.905236\n0.306275\n0.505674\n0.223192\n0.705882\n0.604356\n0.757878\n\n\n3\n0.373539\n0.745239\n0.334948\n0.663219\n0.132686\n0.551844\n0.760420\n0.598546\n0.808108\n0.627732\n\n\n4\n0.467623\n0.331457\n0.617263\n0.239449\n0.634455\n0.370041\n0.294412\n0.288539\n0.484822\n0.126945\n\n\n\n\n\n\n\n\n\nLearning home factors for \\(M^{th}\\) home\n\nA_m = A[-1,:]\nA_m_transpose = A_m.T\nmask = ~np.isnan(A_m_transpose)\nW_m = nnls(H.T[mask], A_m_transpose[mask])[0]\n\n\nW_m\n\narray([ 0.12248095, 0.20778687, 0.15185613, 0. ])\n\n\n\n\nPredicting for \\(M^{th}\\) home\n\nratings_m_home = np.dot(H.T, W_m)\n\n\nratings_m_home[~mask]\n\narray([ 0.4245947 , 0.57447552])\n\n\n\nA_orig[-1,:][~mask]\n\narray([ 0.18619301, 0.25435648])\n\n\nThere you go, we are able to get ratings for the \\(M^{th}\\) user for the movies that they have not seen. We only trained the model on the other users! Ofcourse, these numbers might not look so impressive. However, this was just a toy example based on random data. In reality, we could expect better results!" }, { "objectID": "posts/2019-08-20-gaussian-processes.html", @@ -382,7 +382,7 @@ "href": "posts/2021-05-31-gan.html#creating-true-distribution", "title": "A programming introduction to GANs", "section": "Creating “true” distribution", - "text": "Creating “true” distribution\nLet us now create some data from the true/known distribution. We will be essentially creating a 2x2 matrix (image) as explained in Luis Serrano’s tutorial. The (0, 0) and (1, 1) position will be a high number between 0.8 and 1 whereas the other two positions (0, 1) and (1, 0) have values between 0 and 0.1\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport mediapy as media\n\n%matplotlib inline\nnp.random.seed(40)\nimport warnings\nwarnings.filterwarnings('ignore')\nimport logging\nimport os\n\nos.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # FATAL\nlogging.getLogger('tensorflow').setLevel(logging.FATAL)\nimport tensorflow as tf \ntf.get_logger().setLevel('ERROR')\n\ntf.random.set_seed(42)\n\n\nSIZE = 5000\nfaces = np.vstack((np.random.uniform(0.8, 1, SIZE), \n np.random.uniform(0., 0.1, SIZE),\n np.random.uniform(0., 0.1, SIZE),\n np.random.uniform(0.8, 1, SIZE))).T\nfaces.shape\n\n(5000, 4)\n\n\n\ndef plot_face(f):\n f_reshape = f.reshape(2, 2)\n plt.imshow(f_reshape, cmap=\"Greys\")\n\n\ndef plot_faces(faces, subset=1):\n images = {\n f'Image={im}': faces[im].reshape(2, 2)\n for im in range(len(faces))[::subset]\n }\n media.show_images(images, border=True, columns=8, height=80, cmap='Greys')\n\nplot_faces(faces, subset=700)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=0\n\n\n\n\n\n\n\n\n\nImage=700\n\n\n\n\n\n\n\n\n\nImage=1400\n\n\n\n\n\n\n\n\n\nImage=2100\n\n\n\n\n\n\n\n\n\nImage=2800\n\n\n\n\n\n\n\n\n\nImage=3500\n\n\n\n\n\n\n\n\n\nImage=4200\n\n\n\n\n\n\n\n\n\nImage=4900\n\n\n\n\n\n\n\n\n\n\n\n\nThe above shows some samples drawn from the true distibution. Let us also now create some random/noisy samples. These samples do not have any relationship between the 4 positions.\n\n# Examples of noisy images\nnoise = np.random.randn(40, 4)\nnoise = np.abs(noise)\nnoise = noise/noise.max()\n\n\nplot_faces(noise)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=0\n\n\n\n\n\n\n\n\n\nImage=1\n\n\n\n\n\n\n\n\n\nImage=2\n\n\n\n\n\n\n\n\n\nImage=3\n\n\n\n\n\n\n\n\n\nImage=4\n\n\n\n\n\n\n\n\n\nImage=5\n\n\n\n\n\n\n\n\n\nImage=6\n\n\n\n\n\n\n\n\n\nImage=7\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=8\n\n\n\n\n\n\n\n\n\nImage=9\n\n\n\n\n\n\n\n\n\nImage=10\n\n\n\n\n\n\n\n\n\nImage=11\n\n\n\n\n\n\n\n\n\nImage=12\n\n\n\n\n\n\n\n\n\nImage=13\n\n\n\n\n\n\n\n\n\nImage=14\n\n\n\n\n\n\n\n\n\nImage=15\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=16\n\n\n\n\n\n\n\n\n\nImage=17\n\n\n\n\n\n\n\n\n\nImage=18\n\n\n\n\n\n\n\n\n\nImage=19\n\n\n\n\n\n\n\n\n\nImage=20\n\n\n\n\n\n\n\n\n\nImage=21\n\n\n\n\n\n\n\n\n\nImage=22\n\n\n\n\n\n\n\n\n\nImage=23\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=24\n\n\n\n\n\n\n\n\n\nImage=25\n\n\n\n\n\n\n\n\n\nImage=26\n\n\n\n\n\n\n\n\n\nImage=27\n\n\n\n\n\n\n\n\n\nImage=28\n\n\n\n\n\n\n\n\n\nImage=29\n\n\n\n\n\n\n\n\n\nImage=30\n\n\n\n\n\n\n\n\n\nImage=31\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=32\n\n\n\n\n\n\n\n\n\nImage=33\n\n\n\n\n\n\n\n\n\nImage=34\n\n\n\n\n\n\n\n\n\nImage=35\n\n\n\n\n\n\n\n\n\nImage=36\n\n\n\n\n\n\n\n\n\nImage=37\n\n\n\n\n\n\n\n\n\nImage=38\n\n\n\n\n\n\n\n\n\nImage=39" + "text": "Creating “true” distribution\nLet us now create some data from the true/known distribution. We will be essentially creating a 2x2 matrix (image) as explained in Luis Serrano’s tutorial. The (0, 0) and (1, 1) position will be a high number between 0.8 and 1 whereas the other two positions (0, 1) and (1, 0) have values between 0 and 0.1\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport mediapy as media\n\n%matplotlib inline\nnp.random.seed(40)\nimport warnings\nwarnings.filterwarnings('ignore')\nimport logging\nimport os\n\nos.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # FATAL\nlogging.getLogger('tensorflow').setLevel(logging.FATAL)\nimport tensorflow as tf \ntf.get_logger().setLevel('ERROR')\n\ntf.random.set_seed(42)\n\n\nSIZE = 5000\nfaces = np.vstack((np.random.uniform(0.8, 1, SIZE), \n np.random.uniform(0., 0.1, SIZE),\n np.random.uniform(0., 0.1, SIZE),\n np.random.uniform(0.8, 1, SIZE))).T\nfaces.shape\n\n(5000, 4)\n\n\n\ndef plot_face(f):\n f_reshape = f.reshape(2, 2)\n plt.imshow(f_reshape, cmap=\"Greys\")\n\n\ndef plot_faces(faces, subset=1):\n images = {\n f'Image={im}': faces[im].reshape(2, 2)\n for im in range(len(faces))[::subset]\n }\n media.show_images(images, border=True, columns=8, height=80, cmap='Greys')\n\nplot_faces(faces, subset=700)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=0\n\n\n\n\n\n\n\n\n\nImage=700\n\n\n\n\n\n\n\n\n\nImage=1400\n\n\n\n\n\n\n\n\n\nImage=2100\n\n\n\n\n\n\n\n\n\nImage=2800\n\n\n\n\n\n\n\n\n\nImage=3500\n\n\n\n\n\n\n\n\n\nImage=4200\n\n\n\n\n\n\n\n\n\nImage=4900\n\n\n\n\n\n\n\n\n\n\n\nThe above shows some samples drawn from the true distibution. Let us also now create some random/noisy samples. These samples do not have any relationship between the 4 positions.\n\n# Examples of noisy images\nnoise = np.random.randn(40, 4)\nnoise = np.abs(noise)\nnoise = noise/noise.max()\n\n\nplot_faces(noise)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=0\n\n\n\n\n\n\n\n\n\nImage=1\n\n\n\n\n\n\n\n\n\nImage=2\n\n\n\n\n\n\n\n\n\nImage=3\n\n\n\n\n\n\n\n\n\nImage=4\n\n\n\n\n\n\n\n\n\nImage=5\n\n\n\n\n\n\n\n\n\nImage=6\n\n\n\n\n\n\n\n\n\nImage=7\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=8\n\n\n\n\n\n\n\n\n\nImage=9\n\n\n\n\n\n\n\n\n\nImage=10\n\n\n\n\n\n\n\n\n\nImage=11\n\n\n\n\n\n\n\n\n\nImage=12\n\n\n\n\n\n\n\n\n\nImage=13\n\n\n\n\n\n\n\n\n\nImage=14\n\n\n\n\n\n\n\n\n\nImage=15\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=16\n\n\n\n\n\n\n\n\n\nImage=17\n\n\n\n\n\n\n\n\n\nImage=18\n\n\n\n\n\n\n\n\n\nImage=19\n\n\n\n\n\n\n\n\n\nImage=20\n\n\n\n\n\n\n\n\n\nImage=21\n\n\n\n\n\n\n\n\n\nImage=22\n\n\n\n\n\n\n\n\n\nImage=23\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=24\n\n\n\n\n\n\n\n\n\nImage=25\n\n\n\n\n\n\n\n\n\nImage=26\n\n\n\n\n\n\n\n\n\nImage=27\n\n\n\n\n\n\n\n\n\nImage=28\n\n\n\n\n\n\n\n\n\nImage=29\n\n\n\n\n\n\n\n\n\nImage=30\n\n\n\n\n\n\n\n\n\nImage=31\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=32\n\n\n\n\n\n\n\n\n\nImage=33\n\n\n\n\n\n\n\n\n\nImage=34\n\n\n\n\n\n\n\n\n\nImage=35\n\n\n\n\n\n\n\n\n\nImage=36\n\n\n\n\n\n\n\n\n\nImage=37\n\n\n\n\n\n\n\n\n\nImage=38\n\n\n\n\n\n\n\n\n\nImage=39" }, { "objectID": "posts/2021-05-31-gan.html#creating-the-discriminator", @@ -403,14 +403,14 @@ "href": "posts/2021-05-31-gan.html#generating-samples-from-generator", "title": "A programming introduction to GANs", "section": "Generating samples from Generator", - "text": "Generating samples from Generator\nWe can now use our generator to generate some samples and plot them.\n\ndef gen_fake(n_samples):\n x_input = np.random.randn(n_samples, 1)\n X = generator.predict(x_input)\n y = np.zeros((n_samples, 1))\n return X, y\n\nAs expected, the samples look random, without any specific pattern and do not resemble the training data as our generator is untrained. Further, it is important to reiterate that the class associated with the fake samples generated from the generator is 0. Thus, we have the line np.zeros((n_samples, 1)) in the code above.\n\nplot_faces(gen_fake(20)[0])\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=0\n\n\n\n\n\n\n\n\n\nImage=1\n\n\n\n\n\n\n\n\n\nImage=2\n\n\n\n\n\n\n\n\n\nImage=3\n\n\n\n\n\n\n\n\n\nImage=4\n\n\n\n\n\n\n\n\n\nImage=5\n\n\n\n\n\n\n\n\n\nImage=6\n\n\n\n\n\n\n\n\n\nImage=7\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=8\n\n\n\n\n\n\n\n\n\nImage=9\n\n\n\n\n\n\n\n\n\nImage=10\n\n\n\n\n\n\n\n\n\nImage=11\n\n\n\n\n\n\n\n\n\nImage=12\n\n\n\n\n\n\n\n\n\nImage=13\n\n\n\n\n\n\n\n\n\nImage=14\n\n\n\n\n\n\n\n\n\nImage=15\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=16\n\n\n\n\n\n\n\n\n\nImage=17\n\n\n\n\n\n\n\n\n\nImage=18\n\n\n\n\n\n\n\n\n\nImage=19" + "text": "Generating samples from Generator\nWe can now use our generator to generate some samples and plot them.\n\ndef gen_fake(n_samples):\n x_input = np.random.randn(n_samples, 1)\n X = generator.predict(x_input)\n y = np.zeros((n_samples, 1))\n return X, y\n\nAs expected, the samples look random, without any specific pattern and do not resemble the training data as our generator is untrained. Further, it is important to reiterate that the class associated with the fake samples generated from the generator is 0. Thus, we have the line np.zeros((n_samples, 1)) in the code above.\n\nplot_faces(gen_fake(20)[0])\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=0\n\n\n\n\n\n\n\n\n\nImage=1\n\n\n\n\n\n\n\n\n\nImage=2\n\n\n\n\n\n\n\n\n\nImage=3\n\n\n\n\n\n\n\n\n\nImage=4\n\n\n\n\n\n\n\n\n\nImage=5\n\n\n\n\n\n\n\n\n\nImage=6\n\n\n\n\n\n\n\n\n\nImage=7\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=8\n\n\n\n\n\n\n\n\n\nImage=9\n\n\n\n\n\n\n\n\n\nImage=10\n\n\n\n\n\n\n\n\n\nImage=11\n\n\n\n\n\n\n\n\n\nImage=12\n\n\n\n\n\n\n\n\n\nImage=13\n\n\n\n\n\n\n\n\n\nImage=14\n\n\n\n\n\n\n\n\n\nImage=15\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=16\n\n\n\n\n\n\n\n\n\nImage=17\n\n\n\n\n\n\n\n\n\nImage=18\n\n\n\n\n\n\n\n\n\nImage=19" }, { "objectID": "posts/2021-05-31-gan.html#sampling-from-the-real-train-dataset", "href": "posts/2021-05-31-gan.html#sampling-from-the-real-train-dataset", "title": "A programming introduction to GANs", "section": "Sampling from the Real (Train) Dataset", - "text": "Sampling from the Real (Train) Dataset\n\ndef gen_real(n_samples):\n ix = np.random.randint(0, faces.shape[0], n_samples)\n X = faces[ix]\n y = np.ones((n_samples, 1))\n return X, y\n\n\nplot_faces(gen_real(20)[0])\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=0\n\n\n\n\n\n\n\n\n\nImage=1\n\n\n\n\n\n\n\n\n\nImage=2\n\n\n\n\n\n\n\n\n\nImage=3\n\n\n\n\n\n\n\n\n\nImage=4\n\n\n\n\n\n\n\n\n\nImage=5\n\n\n\n\n\n\n\n\n\nImage=6\n\n\n\n\n\n\n\n\n\nImage=7\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=8\n\n\n\n\n\n\n\n\n\nImage=9\n\n\n\n\n\n\n\n\n\nImage=10\n\n\n\n\n\n\n\n\n\nImage=11\n\n\n\n\n\n\n\n\n\nImage=12\n\n\n\n\n\n\n\n\n\nImage=13\n\n\n\n\n\n\n\n\n\nImage=14\n\n\n\n\n\n\n\n\n\nImage=15\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=16\n\n\n\n\n\n\n\n\n\nImage=17\n\n\n\n\n\n\n\n\n\nImage=18\n\n\n\n\n\n\n\n\n\nImage=19\n\n\n\n\n\n\n\n\n\n\n\n\nWe can clearly see the pattern in the images coming from the training dataset." + "text": "Sampling from the Real (Train) Dataset\n\ndef gen_real(n_samples):\n ix = np.random.randint(0, faces.shape[0], n_samples)\n X = faces[ix]\n y = np.ones((n_samples, 1))\n return X, y\n\n\nplot_faces(gen_real(20)[0])\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=0\n\n\n\n\n\n\n\n\n\nImage=1\n\n\n\n\n\n\n\n\n\nImage=2\n\n\n\n\n\n\n\n\n\nImage=3\n\n\n\n\n\n\n\n\n\nImage=4\n\n\n\n\n\n\n\n\n\nImage=5\n\n\n\n\n\n\n\n\n\nImage=6\n\n\n\n\n\n\n\n\n\nImage=7\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=8\n\n\n\n\n\n\n\n\n\nImage=9\n\n\n\n\n\n\n\n\n\nImage=10\n\n\n\n\n\n\n\n\n\nImage=11\n\n\n\n\n\n\n\n\n\nImage=12\n\n\n\n\n\n\n\n\n\nImage=13\n\n\n\n\n\n\n\n\n\nImage=14\n\n\n\n\n\n\n\n\n\nImage=15\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=16\n\n\n\n\n\n\n\n\n\nImage=17\n\n\n\n\n\n\n\n\n\nImage=18\n\n\n\n\n\n\n\n\n\nImage=19\n\n\n\n\n\n\n\n\n\n\n\nWe can clearly see the pattern in the images coming from the training dataset." }, { "objectID": "posts/2021-05-31-gan.html#training-the-gan", @@ -431,21 +431,21 @@ "href": "posts/2021-05-31-gan.html#generating-some-fake-images-from-the-trained-generator", "title": "A programming introduction to GANs", "section": "Generating some “fake” images from the trained generator", - "text": "Generating some “fake” images from the trained generator\n\nplot_faces(gen_fake(20)[0])\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=0\n\n\n\n\n\n\n\n\n\nImage=1\n\n\n\n\n\n\n\n\n\nImage=2\n\n\n\n\n\n\n\n\n\nImage=3\n\n\n\n\n\n\n\n\n\nImage=4\n\n\n\n\n\n\n\n\n\nImage=5\n\n\n\n\n\n\n\n\n\nImage=6\n\n\n\n\n\n\n\n\n\nImage=7\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=8\n\n\n\n\n\n\n\n\n\nImage=9\n\n\n\n\n\n\n\n\n\nImage=10\n\n\n\n\n\n\n\n\n\nImage=11\n\n\n\n\n\n\n\n\n\nImage=12\n\n\n\n\n\n\n\n\n\nImage=13\n\n\n\n\n\n\n\n\n\nImage=14\n\n\n\n\n\n\n\n\n\nImage=15\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=16\n\n\n\n\n\n\n\n\n\nImage=17\n\n\n\n\n\n\n\n\n\nImage=18\n\n\n\n\n\n\n\n\n\nImage=19\n\n\n\n\n\n\n\n\n\n\n\n\nYou could not tell, right! The generator has been trained well!" + "text": "Generating some “fake” images from the trained generator\n\nplot_faces(gen_fake(20)[0])\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=0\n\n\n\n\n\n\n\n\n\nImage=1\n\n\n\n\n\n\n\n\n\nImage=2\n\n\n\n\n\n\n\n\n\nImage=3\n\n\n\n\n\n\n\n\n\nImage=4\n\n\n\n\n\n\n\n\n\nImage=5\n\n\n\n\n\n\n\n\n\nImage=6\n\n\n\n\n\n\n\n\n\nImage=7\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=8\n\n\n\n\n\n\n\n\n\nImage=9\n\n\n\n\n\n\n\n\n\nImage=10\n\n\n\n\n\n\n\n\n\nImage=11\n\n\n\n\n\n\n\n\n\nImage=12\n\n\n\n\n\n\n\n\n\nImage=13\n\n\n\n\n\n\n\n\n\nImage=14\n\n\n\n\n\n\n\n\n\nImage=15\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nImage=16\n\n\n\n\n\n\n\n\n\nImage=17\n\n\n\n\n\n\n\n\n\nImage=18\n\n\n\n\n\n\n\n\n\nImage=19\n\n\n\n\n\n\n\n\n\n\n\nYou could not tell, right! The generator has been trained well!" }, { "objectID": "posts/2021-05-31-gan.html#visualising-evolution-of-generator", "href": "posts/2021-05-31-gan.html#visualising-evolution-of-generator", "title": "A programming introduction to GANs", "section": "Visualising evolution of generator", - "text": "Visualising evolution of generator\nLet us now visualise the evolution of the generator. To do so, we use the already saved generator models at different iterations and feed them the same “random” input.\n\no = {}\nfor i in range(0, N_ITER, STEP):\n for inp in [0., 0.2, 0.4, 0.6, 1.]:\n o[f'It:{i}-Inp:{inp}'] = load_model(f\"models/gen-{i}\").predict(np.array([inp])).reshape(2, 2)\n\n\nmedia.show_images(o, border=True, columns=5, height=80, cmap='Greys')\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:0-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:0-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:0-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:0-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:0-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:100-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:100-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:100-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:100-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:100-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:200-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:200-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:200-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:200-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:200-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:300-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:300-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:300-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:300-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:300-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:400-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:400-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:400-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:400-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:400-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:500-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:500-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:500-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:500-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:500-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:600-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:600-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:600-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:600-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:600-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:700-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:700-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:700-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:700-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:700-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:800-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:800-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:800-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:800-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:800-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:900-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:900-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:900-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:900-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:900-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\nWe can see above the improvement of the generation over the different iterations and different inputs! That is it for this article. Happing GANning." + "text": "Visualising evolution of generator\nLet us now visualise the evolution of the generator. To do so, we use the already saved generator models at different iterations and feed them the same “random” input.\n\no = {}\nfor i in range(0, N_ITER, STEP):\n for inp in [0., 0.2, 0.4, 0.6, 1.]:\n o[f'It:{i}-Inp:{inp}'] = load_model(f\"models/gen-{i}\").predict(np.array([inp])).reshape(2, 2)\n\n\nmedia.show_images(o, border=True, columns=5, height=80, cmap='Greys')\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:0-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:0-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:0-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:0-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:0-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:100-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:100-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:100-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:100-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:100-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:200-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:200-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:200-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:200-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:200-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:300-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:300-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:300-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:300-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:300-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:400-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:400-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:400-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:400-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:400-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:500-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:500-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:500-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:500-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:500-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:600-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:600-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:600-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:600-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:600-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:700-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:700-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:700-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:700-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:700-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:800-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:800-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:800-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:800-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:800-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIt:900-Inp:0.0\n\n\n\n\n\n\n\n\n\nIt:900-Inp:0.2\n\n\n\n\n\n\n\n\n\nIt:900-Inp:0.4\n\n\n\n\n\n\n\n\n\nIt:900-Inp:0.6\n\n\n\n\n\n\n\n\n\nIt:900-Inp:1.0\n\n\n\n\n\n\n\n\n\n\n\nWe can see above the improvement of the generation over the different iterations and different inputs! That is it for this article. Happing GANning." }, { "objectID": "posts/2017-08-13-mf-autograd-adagrad.html", "href": "posts/2017-08-13-mf-autograd-adagrad.html", "title": "Adagrad based matrix factorization", "section": "", - "text": "In a previous post, we had seen how to perfom non-negative matrix factorization (NNMF) using Tensorflow. In another previous post, I had shown how to use Adagrad for linear regression. This current post can be considered an extension of the linear regression using Adagrad post. Just for the purpose of education, I’ll poorly initialise the estimate of one of the decomposed matrix, to see how well Adagrad can adjust weights!\n\nCustomary imports\n\nimport autograd.numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom matplotlib.animation import FuncAnimation\nfrom matplotlib import gridspec\n\n%matplotlib inline\n\n\n\nCreating the matrix to be decomposed\n\nA = np.array([[3, 4, 5, 2],\n [4, 4, 3, 3],\n [5, 5, 4, 3]], dtype=np.float32).T\n\n\n\nMasking one entry\n\nA[0, 0] = np.NAN\n\n\nA\n\narray([[ nan, 4., 5.],\n [ 4., 4., 5.],\n [ 5., 3., 4.],\n [ 2., 3., 3.]], dtype=float32)\n\n\n\n\nDefining the cost function\n\ndef cost(param_list):\n W, H = param_list\n pred = np.dot(W, H)\n mask = ~np.isnan(A)\n return np.sqrt(((pred - A)[mask].flatten() ** 2).mean(axis=None))\n\n\n\nDecomposition params\n\nrank = 2\nlearning_rate=0.01\nn_steps = 10000\n\n\n\nAdagrad routine\n\ndef adagrad_gd(param_init, cost, niter=5, lr=1e-2, eps=1e-8, random_seed=0):\n \"\"\"\n param_init: List of initial values of parameters\n cost: cost function\n niter: Number of iterations to run\n lr: Learning rate\n eps: Fudge factor, to avoid division by zero\n \"\"\"\n from copy import deepcopy\n from autograd import grad\n # Fixing the random_seed\n np.random.seed(random_seed)\n \n # Function to compute the gradient of the cost function\n grad_cost = grad(cost)\n params = deepcopy(param_init)\n param_array, grad_array, lr_array, cost_array = [params], [], [[lr*np.ones_like(_) for _ in params]], [cost(params)]\n # Initialising sum of squares of gradients for each param as 0\n sum_squares_gradients = [np.zeros_like(param) for param in params]\n for i in range(niter):\n out_params = []\n gradients = grad_cost(params)\n # At each iteration, we add the square of the gradients to `sum_squares_gradients`\n sum_squares_gradients= [eps + sum_prev + np.square(g) for sum_prev, g in zip(sum_squares_gradients, gradients)]\n # Adapted learning rate for parameter list\n lrs = [np.divide(lr, np.sqrt(sg)) for sg in sum_squares_gradients]\n # Paramter update\n params = [param-(adapted_lr*grad_param) for param, adapted_lr, grad_param in zip(params, lrs, gradients)]\n param_array.append(params)\n lr_array.append(lrs)\n grad_array.append(gradients)\n cost_array.append(cost(params))\n \n return params, param_array, grad_array, lr_array, cost_array\n\n\n\nRunning Adagrad\n\nFixing initial parameters\nI’m poorly initialising H here to see how the learning rates vary for W and H.\n\nnp.random.seed(0)\nshape = A.shape\nH_init = -5*np.abs(np.random.randn(rank, shape[1]))\nW_init = np.abs(np.random.randn(shape[0], rank))\nparam_init = [W_init, H_init]\n\n\nH_init\n\narray([[ -8.82026173, -2.00078604, -4.89368992],\n [-11.204466 , -9.33778995, -4.8863894 ]])\n\n\n\nW_init\n\narray([[ 0.95008842, 0.15135721],\n [ 0.10321885, 0.4105985 ],\n [ 0.14404357, 1.45427351],\n [ 0.76103773, 0.12167502]])\n\n\n\n# Cost for initial set of parameters\ncost(param_init)\n\n11.651268820608442\n\n\n\nlr = 0.1\neps=1e-8\nniter=2000\nada_params, ada_param_array, ada_grad_array, ada_lr_array, ada_cost_array = adagrad_gd(param_init, cost, niter=niter, lr=lr, eps=eps)\n\n\n\nCost v/s # iterations\n\npd.Series(ada_cost_array).plot(logy=True)\nplt.ylabel(\"Cost (log scale)\")\nplt.xlabel(\"# Iterations\")\n\n\n\n\n\n\n\n\n\n\nFinal set of parameters and recovered matrix\n\nW_final, H_final = ada_params\npred = np.dot(W_final, H_final)\npred_df = pd.DataFrame(pred).round()\npred_df\n\n\n\n\n\n\n\n\n\n0\n1\n2\n\n\n\n\n0\n5.0\n4.0\n5.0\n\n\n1\n4.0\n4.0\n5.0\n\n\n2\n5.0\n3.0\n4.0\n\n\n3\n2.0\n3.0\n3.0\n\n\n\n\n\n\n\n\n\n\nLearning rate evolution for W\n\nW_lrs = np.array(ada_lr_array)[:, 0]\n\n\nW_lrs = np.array(ada_lr_array)[:, 0]\nfig= plt.figure(figsize=(4, 2))\ngs = gridspec.GridSpec(1, 2, width_ratios=[8, 1]) \nax = plt.subplot(gs[0]), plt.subplot(gs[1])\nmax_W, min_W = np.max([np.max(x) for x in W_lrs]), np.min([np.min(x) for x in W_lrs])\n\ndef update(iteration):\n ax[0].cla()\n ax[1].cla()\n sns.heatmap(W_lrs[iteration], vmin=min_W, vmax=max_W, ax=ax[0], annot=True, fmt='.4f', cbar_ax=ax[1])\n ax[0].set_title(\"Learning rate update for W\\nIteration: {}\".format(iteration))\n fig.tight_layout()\n\nanim = FuncAnimation(fig, update, frames=np.arange(0, 200, 10), interval=500)\nanim.save('W_update.gif', dpi=80, writer='imagemagick')\nplt.close()\n\n\n\n\nLearning rate evolution for H\n\nH_lrs = np.array(ada_lr_array)[:, 1]\n\nfig= plt.figure(figsize=(4, 2))\ngs = gridspec.GridSpec(1, 2, width_ratios=[10, 1]) \nax = plt.subplot(gs[0]), plt.subplot(gs[1])\nmax_H, min_H = np.max([np.max(x) for x in H_lrs]), np.min([np.min(x) for x in H_lrs])\n\ndef update(iteration):\n ax[0].cla()\n ax[1].cla()\n sns.heatmap(H_lrs[iteration], vmin=min_H, vmax=max_H, ax=ax[0], annot=True, fmt='.2f', cbar_ax=ax[1])\n ax[0].set_title(\"Learning rate update for H\\nIteration: {}\".format(iteration))\n fig.tight_layout()\n\nanim = FuncAnimation(fig, update, frames=np.arange(0, 200, 10), interval=500)\nanim.save('H_update.gif', dpi=80, writer='imagemagick')\nplt.close()" + "text": "In a previous post, we had seen how to perfom non-negative matrix factorization (NNMF) using Tensorflow. In another previous post, I had shown how to use Adagrad for linear regression. This current post can be considered an extension of the linear regression using Adagrad post. Just for the purpose of education, I’ll poorly initialise the estimate of one of the decomposed matrix, to see how well Adagrad can adjust weights!\n\nCustomary imports\n\nimport autograd.numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom matplotlib.animation import FuncAnimation\nfrom matplotlib import gridspec\n\n%matplotlib inline\n\n\n\nCreating the matrix to be decomposed\n\nA = np.array([[3, 4, 5, 2],\n [4, 4, 3, 3],\n [5, 5, 4, 3]], dtype=np.float32).T\n\n\n\nMasking one entry\n\nA[0, 0] = np.NAN\n\n\nA\n\narray([[ nan, 4., 5.],\n [ 4., 4., 5.],\n [ 5., 3., 4.],\n [ 2., 3., 3.]], dtype=float32)\n\n\n\n\nDefining the cost function\n\ndef cost(param_list):\n W, H = param_list\n pred = np.dot(W, H)\n mask = ~np.isnan(A)\n return np.sqrt(((pred - A)[mask].flatten() ** 2).mean(axis=None))\n\n\n\nDecomposition params\n\nrank = 2\nlearning_rate=0.01\nn_steps = 10000\n\n\n\nAdagrad routine\n\ndef adagrad_gd(param_init, cost, niter=5, lr=1e-2, eps=1e-8, random_seed=0):\n \"\"\"\n param_init: List of initial values of parameters\n cost: cost function\n niter: Number of iterations to run\n lr: Learning rate\n eps: Fudge factor, to avoid division by zero\n \"\"\"\n from copy import deepcopy\n from autograd import grad\n # Fixing the random_seed\n np.random.seed(random_seed)\n \n # Function to compute the gradient of the cost function\n grad_cost = grad(cost)\n params = deepcopy(param_init)\n param_array, grad_array, lr_array, cost_array = [params], [], [[lr*np.ones_like(_) for _ in params]], [cost(params)]\n # Initialising sum of squares of gradients for each param as 0\n sum_squares_gradients = [np.zeros_like(param) for param in params]\n for i in range(niter):\n out_params = []\n gradients = grad_cost(params)\n # At each iteration, we add the square of the gradients to `sum_squares_gradients`\n sum_squares_gradients= [eps + sum_prev + np.square(g) for sum_prev, g in zip(sum_squares_gradients, gradients)]\n # Adapted learning rate for parameter list\n lrs = [np.divide(lr, np.sqrt(sg)) for sg in sum_squares_gradients]\n # Paramter update\n params = [param-(adapted_lr*grad_param) for param, adapted_lr, grad_param in zip(params, lrs, gradients)]\n param_array.append(params)\n lr_array.append(lrs)\n grad_array.append(gradients)\n cost_array.append(cost(params))\n \n return params, param_array, grad_array, lr_array, cost_array\n\n\n\nRunning Adagrad\n\nFixing initial parameters\nI’m poorly initialising H here to see how the learning rates vary for W and H.\n\nnp.random.seed(0)\nshape = A.shape\nH_init = -5*np.abs(np.random.randn(rank, shape[1]))\nW_init = np.abs(np.random.randn(shape[0], rank))\nparam_init = [W_init, H_init]\n\n\nH_init\n\narray([[ -8.82026173, -2.00078604, -4.89368992],\n [-11.204466 , -9.33778995, -4.8863894 ]])\n\n\n\nW_init\n\narray([[ 0.95008842, 0.15135721],\n [ 0.10321885, 0.4105985 ],\n [ 0.14404357, 1.45427351],\n [ 0.76103773, 0.12167502]])\n\n\n\n# Cost for initial set of parameters\ncost(param_init)\n\n11.651268820608442\n\n\n\nlr = 0.1\neps=1e-8\nniter=2000\nada_params, ada_param_array, ada_grad_array, ada_lr_array, ada_cost_array = adagrad_gd(param_init, cost, niter=niter, lr=lr, eps=eps)\n\n\n\nCost v/s # iterations\n\npd.Series(ada_cost_array).plot(logy=True)\nplt.ylabel(\"Cost (log scale)\")\nplt.xlabel(\"# Iterations\")\n\n\n\n\n\n\n\n\n\n\nFinal set of parameters and recovered matrix\n\nW_final, H_final = ada_params\npred = np.dot(W_final, H_final)\npred_df = pd.DataFrame(pred).round()\npred_df\n\n\n\n\n\n\n\n\n0\n1\n2\n\n\n\n\n0\n5.0\n4.0\n5.0\n\n\n1\n4.0\n4.0\n5.0\n\n\n2\n5.0\n3.0\n4.0\n\n\n3\n2.0\n3.0\n3.0\n\n\n\n\n\n\n\n\n\nLearning rate evolution for W\n\nW_lrs = np.array(ada_lr_array)[:, 0]\n\n\nW_lrs = np.array(ada_lr_array)[:, 0]\nfig= plt.figure(figsize=(4, 2))\ngs = gridspec.GridSpec(1, 2, width_ratios=[8, 1]) \nax = plt.subplot(gs[0]), plt.subplot(gs[1])\nmax_W, min_W = np.max([np.max(x) for x in W_lrs]), np.min([np.min(x) for x in W_lrs])\n\ndef update(iteration):\n ax[0].cla()\n ax[1].cla()\n sns.heatmap(W_lrs[iteration], vmin=min_W, vmax=max_W, ax=ax[0], annot=True, fmt='.4f', cbar_ax=ax[1])\n ax[0].set_title(\"Learning rate update for W\\nIteration: {}\".format(iteration))\n fig.tight_layout()\n\nanim = FuncAnimation(fig, update, frames=np.arange(0, 200, 10), interval=500)\nanim.save('W_update.gif', dpi=80, writer='imagemagick')\nplt.close()\n\n\n\n\nLearning rate evolution for H\n\nH_lrs = np.array(ada_lr_array)[:, 1]\n\nfig= plt.figure(figsize=(4, 2))\ngs = gridspec.GridSpec(1, 2, width_ratios=[10, 1]) \nax = plt.subplot(gs[0]), plt.subplot(gs[1])\nmax_H, min_H = np.max([np.max(x) for x in H_lrs]), np.min([np.min(x) for x in H_lrs])\n\ndef update(iteration):\n ax[0].cla()\n ax[1].cla()\n sns.heatmap(H_lrs[iteration], vmin=min_H, vmax=max_H, ax=ax[0], annot=True, fmt='.2f', cbar_ax=ax[1])\n ax[0].set_title(\"Learning rate update for H\\nIteration: {}\".format(iteration))\n fig.tight_layout()\n\nanim = FuncAnimation(fig, update, frames=np.arange(0, 200, 10), interval=500)\nanim.save('H_update.gif', dpi=80, writer='imagemagick')\nplt.close()" }, { "objectID": "posts/svd.html", @@ -459,7 +459,7 @@ "href": "posts/2022-01-26-tfp-distributions.html", "title": "Testing out some distributions in Tensorflow Probability", "section": "", - "text": "import numpy as np\nimport matplotlib.pyplot as plt\nimport tensorflow as tf\nimport seaborn as sns\nimport tensorflow_probability as tfp\nimport pandas as pd\ntfd = tfp.distributions\nsns.reset_defaults()\nsns.set_context(context='talk',font_scale=1)\n%matplotlib inline\n%config InlineBackend.figure_format='retina'\n\n\nUnivariate normal\n\nuv_normal = tfd.Normal(loc=0., scale=1.)\n\n\nuv_normal\n\n<tfp.distributions.Normal 'Normal' batch_shape=[] event_shape=[] dtype=float32>\n\n\n\nsamples = uv_normal.sample(1000)\n\n\nsns.histplot(samples.numpy())\nsns.despine()\n\n\n\n\n\n\n\n\n\nsns.displot(samples.numpy(), kind='kde')\n\n\n\n\n\n\n\n\n\nuv_normal_dict_mean = {x: tfd.Normal(loc=x, scale=1.) for x in [-2, -1, 0, 1, 2]}\n\n\nuv_normal_dict_mean_samples = pd.DataFrame({x:uv_normal_dict_mean[x].sample(10000).numpy() \n for x in uv_normal_dict_mean})\n\n\nsns.displot(uv_normal_dict_mean_samples, kind='kde', fill=True)\n\n\n\n\n\n\n\n\n\nuv_normal_dict_var = {x: tfd.Normal(loc=0, scale=x) for x in [1, 2, 5, 10]}\nuv_normal_dict_var_samples = pd.DataFrame({x:uv_normal_dict_var[x].sample(10000).numpy() \n for x in uv_normal_dict_var})\n\n\nsns.displot(uv_normal_dict_var_samples, kind='kde', fill=True)\n\n\n\n\n\n\n\n\n\n\nUsing batches\n\nvar_dfs = pd.DataFrame(\n tfd.Normal(loc=[0., 0., 0., 0.],\n scale=[1., 2., 5., 10.]).sample(10000).numpy())\nvar_dfs.columns = [1, 2, 5, 10]\nsns.displot(var_dfs, kind='kde', fill=True)\n\n\n\n\n\n\n\n\n\ntfd.Normal(loc=[0., 0., 0., 0.],\n scale=[1., 2., 5., 10.])\n\n<tfp.distributions.Normal 'Normal' batch_shape=[4] event_shape=[] dtype=float32>\n\n\n\nsamples = uv_normal.sample(10000)\nsns.displot(samples.numpy(), kind='kde')\nplt.axvline(0.5, color='k', linestyle='--')\npdf_05 = uv_normal.prob(0.5).numpy()\nlog_pdf_05 = uv_normal.log_prob(0.5).numpy()\n\n\nplt.title(\"Density at x = 0.5 is {:.2f}\\n Logprob at x = 0.5 is {:.2f}\".format(pdf_05, log_pdf_05))\n\nText(0.5, 1.0, 'Density at x = 0.5 is 0.35\\n Logprob at x = 0.5 is -1.04')\n\n\n\n\n\n\n\n\n\n\n\nLearning parameters\nLet us generate some normally distributed data and see if we can learn the mean.\n\ntrain_data = uv_normal.sample(10000)\n\n\nuv_normal.loc, uv_normal.scale\n\n(<tf.Tensor: shape=(), dtype=float32, numpy=0.0>,\n <tf.Tensor: shape=(), dtype=float32, numpy=1.0>)\n\n\nLet us create a new TFP trainable distribution where we wish to learn the mean.\n\nto_train = tfd.Normal(loc = tf.Variable(-1., name='loc'), scale = 1.)\n\n\nto_train\n\n<tfp.distributions.Normal 'Normal' batch_shape=[] event_shape=[] dtype=float32>\n\n\n\nto_train.trainable_variables\n\n(<tf.Variable 'loc:0' shape=() dtype=float32, numpy=-1.0>,)\n\n\n\ntf.reduce_mean(train_data), tf.math.reduce_variance(train_data)\n\n(<tf.Tensor: shape=(), dtype=float32, numpy=-0.024403999>,\n <tf.Tensor: shape=(), dtype=float32, numpy=0.9995617>)\n\n\n\ndef nll(train):\n return -tf.reduce_mean(to_train.log_prob(train))\n\n\nnll(train_data)\n\n<tf.Tensor: shape=(), dtype=float32, numpy=1.8946133>\n\n\n\ndef get_loss_and_grads(train):\n with tf.GradientTape() as tape:\n tape.watch(to_train.trainable_variables)\n loss = nll(train)\n grads = tape.gradient(loss, to_train.trainable_variables)\n return loss, grads\n\n\nget_loss_and_grads(train_data)\n\n(<tf.Tensor: shape=(), dtype=float32, numpy=1.8946133>,\n (<tf.Tensor: shape=(), dtype=float32, numpy=-0.97559595>,))\n\n\n\noptimizer = tf.keras.optimizers.Adam(learning_rate=0.01)\n\n\noptimizer\n\n<keras.optimizer_v2.adam.Adam at 0x7f94c97ae490>\n\n\n\niterations = 500\nlosses = np.empty(iterations)\nvals = np.empty(iterations)\nfor i in range(iterations):\n loss, grads = get_loss_and_grads(train_data)\n losses[i] = loss\n vals[i] = to_train.trainable_variables[0].numpy()\n optimizer.apply_gradients(zip(grads, to_train.trainable_variables))\n if i%50 == 0:\n print(i, loss.numpy())\n\n0 1.8946133\n50 1.5505791\n100 1.4401271\n150 1.4205703\n200 1.4187955\n250 1.4187206\n300 1.4187194\n350 1.4187193\n400 1.4187193\n450 1.4187194\n\n\n\nplt.plot(losses)\nsns.despine()\nplt.xlabel(\"Iterations\")\nplt.ylabel(\"Loss\")\n\nText(0, 0.5, 'Loss')\n\n\n\n\n\n\n\n\n\n\nplt.plot(vals)\nsns.despine()\nplt.xlabel(\"Iterations\")\nplt.ylabel(r\"Value of $\\hat{\\mu}$\")\n\nText(0, 0.5, 'Value of $\\\\hat{\\\\mu}$')\n\n\n\n\n\n\n\n\n\n\nto_train_mean_var = tfd.Normal(loc = tf.Variable(-1., name='loc'), scale = tf.Variable(10., name='scale'))\n\ndef nll(train):\n return -tf.reduce_mean(to_train_mean_var.log_prob(train))\n\ndef get_loss_and_grads(train):\n with tf.GradientTape() as tape:\n tape.watch(to_train_mean_var.trainable_variables)\n loss = nll(train)\n grads = tape.gradient(loss, to_train_mean_var.trainable_variables)\n return loss, grads\n\nto_train_mean_var.trainable_variables\n\noptimizer = tf.keras.optimizers.Adam(learning_rate=0.01)\n\niterations = 1000\nlosses = np.empty(iterations)\nvals_scale = np.empty(iterations)\nvals_means = np.empty(iterations)\nfor i in range(iterations):\n loss, grads = get_loss_and_grads(train_data)\n losses[i] = loss\n vals_means[i] = to_train_mean_var.trainable_variables[0].numpy()\n vals_scale[i] = to_train_mean_var.trainable_variables[1].numpy()\n\n\n optimizer.apply_gradients(zip(grads, to_train_mean_var.trainable_variables))\n if i%50 == 0:\n print(i, loss.numpy())\n\n0 3.2312806\n50 3.1768403\n100 3.1204312\n150 3.0602157\n200 2.9945102\n250 2.9219644\n300 2.8410006\n350 2.749461\n400 2.6442661\n450 2.5208094\n500 2.3718355\n550 2.1852348\n600 1.9403238\n650 1.6161448\n700 1.4188237\n750 1.4187355\n800 1.4187193\n850 1.4187193\n900 1.4187193\n950 1.4187193\n\n\n\nplt.plot(losses)\nsns.despine()\nplt.xlabel(\"Iterations\")\nplt.ylabel(\"Loss\")\n\nText(0, 0.5, 'Loss')\n\n\n\n\n\n\n\n\n\n\ndf = pd.DataFrame({\"Mean\":vals_means, \"Scale\":vals_scale}, index=range(iterations))\ndf.index.name = 'Iteration'\n\n\ndf.plot(alpha=1)\nsns.despine()\nplt.axhline(0, linestyle='--', lw = 4, label = 'True mean', alpha=0.5, color='purple')\nplt.axhline(1, linestyle='--', lw = 4, label = 'True scale', alpha=0.5, color='red')\nplt.legend()\n\n\n\n\n\n\n\n\n\n\nMultivariate Normal\n\nmv_normal = tfd.MultivariateNormalFullCovariance(loc=[0, 0], covariance_matrix=[[1, 0.5], [0.5, 2]])\n\n\nmv_data = pd.DataFrame(mv_normal.sample(10000).numpy())\nmv_data.columns = [r'$x_1$', r'$x_2$']\n\n\nmv_normal.prob([0, 0])\n\n<tf.Tensor: shape=(), dtype=float32, numpy=0.120309845>\n\n\n\nfrom mpl_toolkits.mplot3d import Axes3D\nfrom matplotlib import cm\n\n\ndef make_pdf_2d_gaussian(mu, sigma):\n N = 60\n X = np.linspace(-3, 3, N)\n Y = np.linspace(-3, 4, N)\n X, Y = np.meshgrid(X, Y)\n\n # Pack X and Y into a single 3-dimensional array\n pos = np.empty(X.shape + (2,))\n pos[:, :, 0] = X\n pos[:, :, 1] = Y\n\n F = tfd.MultivariateNormalFullCovariance(loc=mu, covariance_matrix=sigma)\n Z = F.prob(pos)\n\n plt.contourf(X, Y, Z, cmap=cm.Purples)\n sns.despine() \n plt.xlabel(r\"$x_1$\")\n plt.ylabel(r\"$x_2$\")\n plt.gca().set_aspect('equal')\n plt.title(f'$\\mu$ = {mu}\\n $\\Sigma$ = {np.array(sigma)}')\n\n\nmake_pdf_2d_gaussian([0, 0,], [[1, 0.5,], [0.5, 1]])\n\n\n\n\n\n\n\n\n\nmake_pdf_2d_gaussian([0, 0,], [[3, 0.,], [0., 1]])\n\n\n\n\n\n\n\n\n\nsns.jointplot(data=mv_data,\n x=r'$x_1$',y=r'$x_2$',\n alpha=0.1)\n\n\n\n\n\n\n\n\n\nmv_data\n\n\n\n\n\n\n\n\n\n$x_1$\n$x_2$\n\n\n\n\n0\n2.155621\n-0.343866\n\n\n1\n-0.731184\n0.378393\n\n\n2\n0.832593\n-0.459740\n\n\n3\n-0.701200\n-0.249675\n\n\n4\n-0.430790\n-1.694002\n\n\n...\n...\n...\n\n\n9995\n-0.165910\n-0.171243\n\n\n9996\n0.208389\n-1.698432\n\n\n9997\n-0.030418\n0.353905\n\n\n9998\n1.342328\n1.127457\n\n\n9999\n-0.145741\n0.830713\n\n\n\n\n10000 rows × 2 columns" + "text": "import numpy as np\nimport matplotlib.pyplot as plt\nimport tensorflow as tf\nimport seaborn as sns\nimport tensorflow_probability as tfp\nimport pandas as pd\ntfd = tfp.distributions\nsns.reset_defaults()\nsns.set_context(context='talk',font_scale=1)\n%matplotlib inline\n%config InlineBackend.figure_format='retina'\n\n\nUnivariate normal\n\nuv_normal = tfd.Normal(loc=0., scale=1.)\n\n\nuv_normal\n\n<tfp.distributions.Normal 'Normal' batch_shape=[] event_shape=[] dtype=float32>\n\n\n\nsamples = uv_normal.sample(1000)\n\n\nsns.histplot(samples.numpy())\nsns.despine()\n\n\n\n\n\n\n\n\n\nsns.displot(samples.numpy(), kind='kde')\n\n\n\n\n\n\n\n\n\nuv_normal_dict_mean = {x: tfd.Normal(loc=x, scale=1.) for x in [-2, -1, 0, 1, 2]}\n\n\nuv_normal_dict_mean_samples = pd.DataFrame({x:uv_normal_dict_mean[x].sample(10000).numpy() \n for x in uv_normal_dict_mean})\n\n\nsns.displot(uv_normal_dict_mean_samples, kind='kde', fill=True)\n\n\n\n\n\n\n\n\n\nuv_normal_dict_var = {x: tfd.Normal(loc=0, scale=x) for x in [1, 2, 5, 10]}\nuv_normal_dict_var_samples = pd.DataFrame({x:uv_normal_dict_var[x].sample(10000).numpy() \n for x in uv_normal_dict_var})\n\n\nsns.displot(uv_normal_dict_var_samples, kind='kde', fill=True)\n\n\n\n\n\n\n\n\n\n\nUsing batches\n\nvar_dfs = pd.DataFrame(\n tfd.Normal(loc=[0., 0., 0., 0.],\n scale=[1., 2., 5., 10.]).sample(10000).numpy())\nvar_dfs.columns = [1, 2, 5, 10]\nsns.displot(var_dfs, kind='kde', fill=True)\n\n\n\n\n\n\n\n\n\ntfd.Normal(loc=[0., 0., 0., 0.],\n scale=[1., 2., 5., 10.])\n\n<tfp.distributions.Normal 'Normal' batch_shape=[4] event_shape=[] dtype=float32>\n\n\n\nsamples = uv_normal.sample(10000)\nsns.displot(samples.numpy(), kind='kde')\nplt.axvline(0.5, color='k', linestyle='--')\npdf_05 = uv_normal.prob(0.5).numpy()\nlog_pdf_05 = uv_normal.log_prob(0.5).numpy()\n\n\nplt.title(\"Density at x = 0.5 is {:.2f}\\n Logprob at x = 0.5 is {:.2f}\".format(pdf_05, log_pdf_05))\n\nText(0.5, 1.0, 'Density at x = 0.5 is 0.35\\n Logprob at x = 0.5 is -1.04')\n\n\n\n\n\n\n\n\n\n\n\nLearning parameters\nLet us generate some normally distributed data and see if we can learn the mean.\n\ntrain_data = uv_normal.sample(10000)\n\n\nuv_normal.loc, uv_normal.scale\n\n(<tf.Tensor: shape=(), dtype=float32, numpy=0.0>,\n <tf.Tensor: shape=(), dtype=float32, numpy=1.0>)\n\n\nLet us create a new TFP trainable distribution where we wish to learn the mean.\n\nto_train = tfd.Normal(loc = tf.Variable(-1., name='loc'), scale = 1.)\n\n\nto_train\n\n<tfp.distributions.Normal 'Normal' batch_shape=[] event_shape=[] dtype=float32>\n\n\n\nto_train.trainable_variables\n\n(<tf.Variable 'loc:0' shape=() dtype=float32, numpy=-1.0>,)\n\n\n\ntf.reduce_mean(train_data), tf.math.reduce_variance(train_data)\n\n(<tf.Tensor: shape=(), dtype=float32, numpy=-0.024403999>,\n <tf.Tensor: shape=(), dtype=float32, numpy=0.9995617>)\n\n\n\ndef nll(train):\n return -tf.reduce_mean(to_train.log_prob(train))\n\n\nnll(train_data)\n\n<tf.Tensor: shape=(), dtype=float32, numpy=1.8946133>\n\n\n\ndef get_loss_and_grads(train):\n with tf.GradientTape() as tape:\n tape.watch(to_train.trainable_variables)\n loss = nll(train)\n grads = tape.gradient(loss, to_train.trainable_variables)\n return loss, grads\n\n\nget_loss_and_grads(train_data)\n\n(<tf.Tensor: shape=(), dtype=float32, numpy=1.8946133>,\n (<tf.Tensor: shape=(), dtype=float32, numpy=-0.97559595>,))\n\n\n\noptimizer = tf.keras.optimizers.Adam(learning_rate=0.01)\n\n\noptimizer\n\n<keras.optimizer_v2.adam.Adam at 0x7f94c97ae490>\n\n\n\niterations = 500\nlosses = np.empty(iterations)\nvals = np.empty(iterations)\nfor i in range(iterations):\n loss, grads = get_loss_and_grads(train_data)\n losses[i] = loss\n vals[i] = to_train.trainable_variables[0].numpy()\n optimizer.apply_gradients(zip(grads, to_train.trainable_variables))\n if i%50 == 0:\n print(i, loss.numpy())\n\n0 1.8946133\n50 1.5505791\n100 1.4401271\n150 1.4205703\n200 1.4187955\n250 1.4187206\n300 1.4187194\n350 1.4187193\n400 1.4187193\n450 1.4187194\n\n\n\nplt.plot(losses)\nsns.despine()\nplt.xlabel(\"Iterations\")\nplt.ylabel(\"Loss\")\n\nText(0, 0.5, 'Loss')\n\n\n\n\n\n\n\n\n\n\nplt.plot(vals)\nsns.despine()\nplt.xlabel(\"Iterations\")\nplt.ylabel(r\"Value of $\\hat{\\mu}$\")\n\nText(0, 0.5, 'Value of $\\\\hat{\\\\mu}$')\n\n\n\n\n\n\n\n\n\n\nto_train_mean_var = tfd.Normal(loc = tf.Variable(-1., name='loc'), scale = tf.Variable(10., name='scale'))\n\ndef nll(train):\n return -tf.reduce_mean(to_train_mean_var.log_prob(train))\n\ndef get_loss_and_grads(train):\n with tf.GradientTape() as tape:\n tape.watch(to_train_mean_var.trainable_variables)\n loss = nll(train)\n grads = tape.gradient(loss, to_train_mean_var.trainable_variables)\n return loss, grads\n\nto_train_mean_var.trainable_variables\n\noptimizer = tf.keras.optimizers.Adam(learning_rate=0.01)\n\niterations = 1000\nlosses = np.empty(iterations)\nvals_scale = np.empty(iterations)\nvals_means = np.empty(iterations)\nfor i in range(iterations):\n loss, grads = get_loss_and_grads(train_data)\n losses[i] = loss\n vals_means[i] = to_train_mean_var.trainable_variables[0].numpy()\n vals_scale[i] = to_train_mean_var.trainable_variables[1].numpy()\n\n\n optimizer.apply_gradients(zip(grads, to_train_mean_var.trainable_variables))\n if i%50 == 0:\n print(i, loss.numpy())\n\n0 3.2312806\n50 3.1768403\n100 3.1204312\n150 3.0602157\n200 2.9945102\n250 2.9219644\n300 2.8410006\n350 2.749461\n400 2.6442661\n450 2.5208094\n500 2.3718355\n550 2.1852348\n600 1.9403238\n650 1.6161448\n700 1.4188237\n750 1.4187355\n800 1.4187193\n850 1.4187193\n900 1.4187193\n950 1.4187193\n\n\n\nplt.plot(losses)\nsns.despine()\nplt.xlabel(\"Iterations\")\nplt.ylabel(\"Loss\")\n\nText(0, 0.5, 'Loss')\n\n\n\n\n\n\n\n\n\n\ndf = pd.DataFrame({\"Mean\":vals_means, \"Scale\":vals_scale}, index=range(iterations))\ndf.index.name = 'Iteration'\n\n\ndf.plot(alpha=1)\nsns.despine()\nplt.axhline(0, linestyle='--', lw = 4, label = 'True mean', alpha=0.5, color='purple')\nplt.axhline(1, linestyle='--', lw = 4, label = 'True scale', alpha=0.5, color='red')\nplt.legend()\n\n\n\n\n\n\n\n\n\n\nMultivariate Normal\n\nmv_normal = tfd.MultivariateNormalFullCovariance(loc=[0, 0], covariance_matrix=[[1, 0.5], [0.5, 2]])\n\n\nmv_data = pd.DataFrame(mv_normal.sample(10000).numpy())\nmv_data.columns = [r'$x_1$', r'$x_2$']\n\n\nmv_normal.prob([0, 0])\n\n<tf.Tensor: shape=(), dtype=float32, numpy=0.120309845>\n\n\n\nfrom mpl_toolkits.mplot3d import Axes3D\nfrom matplotlib import cm\n\n\ndef make_pdf_2d_gaussian(mu, sigma):\n N = 60\n X = np.linspace(-3, 3, N)\n Y = np.linspace(-3, 4, N)\n X, Y = np.meshgrid(X, Y)\n\n # Pack X and Y into a single 3-dimensional array\n pos = np.empty(X.shape + (2,))\n pos[:, :, 0] = X\n pos[:, :, 1] = Y\n\n F = tfd.MultivariateNormalFullCovariance(loc=mu, covariance_matrix=sigma)\n Z = F.prob(pos)\n\n plt.contourf(X, Y, Z, cmap=cm.Purples)\n sns.despine() \n plt.xlabel(r\"$x_1$\")\n plt.ylabel(r\"$x_2$\")\n plt.gca().set_aspect('equal')\n plt.title(f'$\\mu$ = {mu}\\n $\\Sigma$ = {np.array(sigma)}')\n\n\nmake_pdf_2d_gaussian([0, 0,], [[1, 0.5,], [0.5, 1]])\n\n\n\n\n\n\n\n\n\nmake_pdf_2d_gaussian([0, 0,], [[3, 0.,], [0., 1]])\n\n\n\n\n\n\n\n\n\nsns.jointplot(data=mv_data,\n x=r'$x_1$',y=r'$x_2$',\n alpha=0.1)\n\n\n\n\n\n\n\n\n\nmv_data\n\n\n\n\n\n\n\n\n$x_1$\n$x_2$\n\n\n\n\n0\n2.155621\n-0.343866\n\n\n1\n-0.731184\n0.378393\n\n\n2\n0.832593\n-0.459740\n\n\n3\n-0.701200\n-0.249675\n\n\n4\n-0.430790\n-1.694002\n\n\n...\n...\n...\n\n\n9995\n-0.165910\n-0.171243\n\n\n9996\n0.208389\n-1.698432\n\n\n9997\n-0.030418\n0.353905\n\n\n9998\n1.342328\n1.127457\n\n\n9999\n-0.145741\n0.830713\n\n\n\n\n10000 rows × 2 columns" }, { "objectID": "posts/residual-torch.html", @@ -525,25 +525,32 @@ "text": "Basic Imports\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport torch\nimport seaborn as sns\nimport pandas as pd\n\ndist =torch.distributions\n\nsns.reset_defaults()\nsns.set_context(context=\"talk\", font_scale=1)\n%matplotlib inline\n%config InlineBackend.figure_format='retina'\n\n\n\nGenerative model for PPCA in PyTorch\n\ndata_dim = 2\nlatent_dim = 1\nnum_datapoints = 100\nz = dist.Normal(\n loc=torch.zeros([latent_dim, num_datapoints]),\n scale=torch.ones([latent_dim, num_datapoints]),)\n\nw = dist.Normal(\n loc=torch.zeros([data_dim, latent_dim]),\n scale=5.0 * torch.ones([data_dim, latent_dim]),\n)\n\n\nw_sample= w.sample()\nz_sample = z.sample()\n\n\nx = dist.Normal(loc = w_sample@z_sample, scale=1)\nx_sample = x.sample([100])\nplt.scatter(x_sample[:, 0], x_sample[:, 1], alpha=0.2, s=30)\n\n\n\n\n\n\n\n\n\n\nGenerative model for PPCA in Pyro\n\nimport pyro.distributions as dist\nimport pyro.distributions.constraints as constraints\nimport pyro\n\npyro.clear_param_store()\n\n\ndef ppca_model(data, latent_dim):\n N, data_dim = data.shape\n W = pyro.sample(\n \"W\",\n dist.Normal(\n loc=torch.zeros([latent_dim, data_dim]),\n scale=5.0 * torch.ones([latent_dim, data_dim]),\n ),\n )\n Z = pyro.sample(\n \"Z\",\n dist.Normal(\n loc=torch.zeros([N, latent_dim]),\n scale=torch.ones([N, latent_dim]),\n ),\n )\n\n mean = Z @ W\n\n return pyro.sample(\"obs\", pyro.distributions.Normal(mean, 1.0), obs=data)\n\n\npyro.render_model(\n ppca_model, model_args=(torch.randn(150, 2), 1), render_distributions=True\n)\n\n\n\n\n\n\n\n\n\nppca_model(x_sample[0], 3).shape\n\ntorch.Size([2, 100])\n\n\n\nfrom pyro import poutine\nwith pyro.plate(\"samples\", 10, dim=-3):\n trace = poutine.trace(ppca_model).get_trace(x_sample[0], 1)\n\n\ntrace.nodes['W']['value'].squeeze()\n\ntorch.Size([10, 100])\n\n\n\ndata_dim = 3\nlatent_dim = 2\n\nW = pyro.sample(\n \"W\",\n dist.Normal(\n loc=torch.zeros([latent_dim, data_dim]),\n scale=5.0 * torch.ones([latent_dim, data_dim]),\n ),\n )\n\n\nN = 150\nZ = pyro.sample(\n \"Z\",\n dist.Normal(\n loc=torch.zeros([N, latent_dim]),\n scale=torch.ones([N, latent_dim]),\n ),\n )\n\n\nZ.shape, W.shape\n\n(torch.Size([150, 2]), torch.Size([2, 3]))\n\n\n\n(Z@W).shape\n\ntorch.Size([150, 3])" }, { - "objectID": "posts/2020-02-20-bayesian-linear-regression.html", - "href": "posts/2020-02-20-bayesian-linear-regression.html", - "title": "Bayesian Linear Regression", + "objectID": "posts/2024-06-10-shortcuts-mac.html", + "href": "posts/2024-06-10-shortcuts-mac.html", + "title": "Keyboard shortcuts on Mac", "section": "", - "text": "import numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\n%matplotlib inline\n\n\nx = np.linspace(-1, 1, 50).reshape(-1, 1)\n\n\ny = 5*x + 4 \nnoise = (np.abs(x.flatten())*np.random.randn(len(x))).reshape(-1,1)\ny = y + noise\n\n\nplt.scatter(x, y)\nplt.plot(x, 5*x + 4, 'k')\n\n\n\n\n\n\n\n\n\nfrom scipy.stats import multivariate_normal\nfrom matplotlib import cm\ncov = np.array([[ 1 , 0], [0, 1]])\nvar = multivariate_normal(mean=[0,0], cov=cov)\nx_grid, y_grid = np.mgrid[-1:1:.01, -1:1:.01]\npos = np.dstack((x_grid, y_grid))\nz = var.pdf(pos)\nplt.contourf(x_grid, y_grid, z)\nplt.gca().set_aspect('equal')\nplt.xlabel(r\"$\\theta_0$\")\nplt.ylabel(r\"$\\theta_1$\")\nplt.title(r\"Prior distribution of $\\theta = f(\\mu, \\Sigma)$\")\nplt.colorbar()\n\n\n\n\n\n\n\n\n\\[\n\\prod_{i=1}^{n} \\frac{1}{\\sqrt{2 \\pi \\sigma^{2}}} e^{-\\frac{(y_{i}-\\hat{y}_{i})^{2}}{2 \\sigma^{2}}}\n\\]\n\nSample from prior\n\nn_samples = 20\nfor n in range(n_samples):\n theta_0_s, theta_1_s = var.rvs()\n plt.plot(x, theta_1_s*x + theta_0_s, color='k',alpha=0.2)\nplt.scatter(x, y)\n\n\n\n\n\n\n\n\n\n\nLikelihood of theta\n\ndef likelihood(theta_0, theta_1, x, y, sigma):\n s = 0\n x_plus_1 = np.hstack((np.ones_like(x), x))\n\n for i in range(len(x)):\n y_i_hat = x_plus_1[i, :]@np.array([theta_0, theta_1])\n s += (y[i,:]-y_i_hat)**2\n \n \n return np.exp(-s/(2*sigma*sigma))/np.sqrt(2*np.pi*sigma*sigma)\n\n\nlikelihood(-1, 1, x, y, 4)\n\narray([1.00683395e-22])\n\n\n\nx_grid_2, y_grid_2 = np.mgrid[0:8:.1, 0:8:.1]\n\nli = np.zeros_like(x_grid_2)\nfor i in range(x_grid_2.shape[0]):\n for j in range(x_grid_2.shape[1]):\n li[i, j] = likelihood(x_grid_2[i, j], y_grid_2[i, j], x, y, 4)\n \n\n\nplt.contourf(x_grid_2, y_grid_2, li)\nplt.gca().set_aspect('equal')\nplt.xlabel(r\"$\\theta_0$\")\nplt.ylabel(r\"$\\theta_1$\")\nplt.colorbar()\nplt.scatter(4, 5, s=200, marker='*', color='r')\nplt.title(r\"Likelihood as a function of ($\\theta_0, \\theta_1$)\")\n\nText(0.5, 1.0, 'Likelihood as a function of ($\\\\theta_0, \\\\theta_1$)')\n\n\n\n\n\n\n\n\n\n\n\nLikelihood of \\(\\sigma^2\\)\n\nx_plus_1 = np.hstack((np.ones_like(x), x))\n\ntheta_mle = np.linalg.inv(x_plus_1.T@x_plus_1)@(x_plus_1.T@y)\nsigma_2_mle = np.linalg.norm(y - x_plus_1@theta_mle)**2\nsigma_mle = np.sqrt(sigma_2_mle)\nsigma_mle\n\n4.128685902124939\n\n\n\n\nPosterior\n\\[\n\\begin{aligned}\np(\\boldsymbol{\\theta} | \\mathcal{X}, \\mathcal{Y}) &=\\mathcal{N}\\left(\\boldsymbol{\\theta} | \\boldsymbol{m}_{N}, \\boldsymbol{S}_{N}\\right) \\\\\n\\boldsymbol{S}_{N} &=\\left(\\boldsymbol{S}_{0}^{-1}+\\sigma^{-2} \\boldsymbol{\\Phi}^{\\top} \\boldsymbol{\\Phi}\\right)^{-1} \\\\\n\\boldsymbol{m}_{N} &=\\boldsymbol{S}_{N}\\left(\\boldsymbol{S}_{0}^{-1} \\boldsymbol{m}_{0}+\\sigma^{-2} \\boldsymbol{\\Phi}^{\\top} \\boldsymbol{y}\\right)\n\\end{aligned}\n\\]\n\nS0 = np.array([[ 1 , 0], [0, 1]])\nM0 = np.array([0, 0])\n\nSN = np.linalg.inv(np.linalg.inv(S0) + (sigma_mle**-2)*x_plus_1.T@x_plus_1)\nMN = SN@(np.linalg.inv(S0)@M0 + (sigma_mle**-2)*(x_plus_1.T@y).squeeze())\n\n\nMN, SN\n\n(array([2.97803341, 2.54277597]), array([[2.54243881e-01, 2.97285330e-17],\n [2.97285330e-17, 4.95625685e-01]]))\n\n\n\nfrom scipy.stats import multivariate_normal\nfrom matplotlib import cm\ncov = np.array([[ 1 , 0], [0, 1]])\nvar_pos = multivariate_normal(mean=MN, cov=SN)\nx_grid, y_grid = np.mgrid[0:8:.1, 0:8:.1]\npos = np.dstack((x_grid, y_grid))\nz = var_pos.pdf(pos)\nplt.contourf(x_grid, y_grid, z)\nplt.gca().set_aspect('equal')\nplt.xlabel(r\"$\\theta_0$\")\nplt.ylabel(r\"$\\theta_1$\")\nplt.title(r\"Posterior distribution of $\\theta = f(\\mu, \\Sigma)$\")\nplt.scatter(4, 5, s=200, marker='*', color='r', label='MLE')\nplt.scatter(MN[0], MN[1], s=100, marker='^', color='black', label='MAP')\n\nplt.colorbar()\nplt.legend()\nplt.savefig(\"../images/blr-map.png\")\n\n\n\n\n\n\n\n\nSample from posterior\n\nn_samples = 20\nfor n in range(n_samples):\n theta_0_s, theta_1_s = var_pos.rvs()\n plt.plot(x, theta_1_s*x + theta_0_s, color='k',alpha=0.2)\nplt.scatter(x, y)\n\n\n\n\n\n\n\n\n\n\nPosterior predictions\n\\[\n\\begin{aligned}\np\\left(y_{*} | \\mathcal{X}, \\mathcal{Y}, \\boldsymbol{x}_{*}\\right) &=\\int p\\left(y_{*} | \\boldsymbol{x}_{*}, \\boldsymbol{\\theta}\\right) p(\\boldsymbol{\\theta} | \\mathcal{X}, \\mathcal{Y}) \\mathrm{d} \\boldsymbol{\\theta} \\\\\n&=\\int \\mathcal{N}\\left(y_{*} | \\boldsymbol{\\phi}^{\\top}\\left(\\boldsymbol{x}_{*}\\right) \\boldsymbol{\\theta}, \\sigma^{2}\\right) \\mathcal{N}\\left(\\boldsymbol{\\theta} | \\boldsymbol{m}_{N}, \\boldsymbol{S}_{N}\\right) \\mathrm{d} \\boldsymbol{\\theta} \\\\\n&=\\mathcal{N}\\left(y_{*} | \\boldsymbol{\\phi}^{\\top}\\left(\\boldsymbol{x}_{*}\\right) \\boldsymbol{m}_{N}, \\boldsymbol{\\phi}^{\\top}\\left(\\boldsymbol{x}_{*}\\right) \\boldsymbol{S}_{N} \\boldsymbol{\\phi}\\left(\\boldsymbol{x}_{*}\\right)+\\sigma^{2}\\right)\n\\end{aligned}\n\\]\nFor a point \\(x*\\)\nPredictive mean = \\(X^Tm_N\\)\nPredictive variance = \\(X^TS_NX + \\sigma^2\\)\n\nx_plus_1.T.shape, SN.shape, x_plus_1.shape\n\n((2, 50), (2, 2), (50, 2))\n\n\n\npred_var = x_plus_1@SN@x_plus_1.T\npred_var.shape\n\n(50, 50)\n\n\n\n## Marginal\nindividual_var = pred_var.diagonal()\n\n\ny_hat_map = x_plus_1@MN\n\nplt.plot(x, y_hat_map, color='black')\nplt.fill_between(x.flatten(), y_hat_map-individual_var, y_hat_map+individual_var, alpha=0.2, color='black')\nplt.scatter(x, y)" + "text": "Safari Tab Shortcuts\n\n\n\nAction\nShortcut\n\n\n\n\nGo to first tab\nCommand (⌘) + 1\n\n\nGo to second tab\nCommand (⌘) + 2\n\n\nGo to third tab\nCommand (⌘) + 3\n\n\nGo to fourth tab\nCommand (⌘) + 4\n\n\nGo to fifth tab\nCommand (⌘) + 5\n\n\nGo to sixth tab\nCommand (⌘) + 6\n\n\nGo to seventh tab\nCommand (⌘) + 7\n\n\nGo to eighth tab\nCommand (⌘) + 8\n\n\nGo to last tab\nCommand (⌘) + 9\n\n\nMove to the next tab\nControl (⌃) + Tab\n\n\nMove to the previous tab\nControl (⌃) + Shift (⇧) + Tab" + }, + { + "objectID": "posts/vscode-tips/index.html", + "href": "posts/vscode-tips/index.html", + "title": "VSCode Settings and Tips", + "section": "", + "text": "Most used keyboard shortcuts\n\n\n\nShortcut\nAction\n\n\n\n\nCmd + Shift + P\nOpen command palette\n\n\nCtrl + `\nToggle terminal\n\n\nCmd + B\nToggle sidebar\n\n\nCmd + Ctrl + F\nToggle full screen" }, { "objectID": "index.html", "href": "index.html", "title": "Blog", "section": "", - "text": "Attention\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 30, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nForecasting\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 30, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nRNN\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 30, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nAttention in Sequence to Sequence\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 17, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSample\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 7, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nObject detection\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 1, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nMixture of Experts\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJan 18, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLogo\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJan 2, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nBasic Neural Process\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nDec 28, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nUsing a neural network as a covariance function\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nDec 22, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nTowards transformers\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nDec 21, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nNaive implementation of Strassen’s algorithm\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nDec 20, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nYouTube video to transcript using openAI whisper and summary using OLLama\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nDec 18, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nReinforcement Learning\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nDec 11, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSuper Resolution using U-Net like architecture\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJul 12, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nTrees using NetworkX\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJun 12, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nPositional Encoding\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJun 9, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nBayesian Active Learning with Disagreement (BALD)\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 26, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nGradient wrt input for a simple model for adversarial attacks\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 5, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nPINN\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 5, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSIREN paper implementation\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nApr 28, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSIREN paper\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nApr 27, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nHeteroskedastic and Homoskedastic MLPs in PyTorch for regression\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nApr 10, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nTensorboard\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMar 21, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nResidual Connections in PyTorch\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMar 18, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nAutoML PyTorch\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 25, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nStacking (Meta Learning)\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 22, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nStacking (Ensemble Learning)\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 21, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nMultivariate Taylor Series\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 20, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nComparing GP libraries\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 6, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSVD\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 1, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nConformal Prediction\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJan 19, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nVSCode Settings and Tips\n\n\n\n\n\n\nML\n\n\nvscode\n\n\n\n\n\n\n\n\n\nJan 17, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nBinomial and Poisson distribution\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nNov 20, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSome useful tidibts in sympy\n\n\n\n\n\n\nML\n\n\n\nSome useful tidibts in sympy\n\n\n\n\n\nNov 9, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nAutoencoders in JAX\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to Autoencoders in JAX\n\n\n\n\n\nNov 4, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nCalibration\n\n\n\n\n\n\nml\n\n\n\nProbability Calibration\n\n\n\n\n\nOct 27, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nMulti-output Gaussian Process\n\n\n\n\n\n\nml\n\n\n\nMulti-output Gaussian Process\n\n\n\n\n\nOct 25, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nWelcome To My Blog\n\n\n\n\n\n\nnews\n\n\n\n\n\n\n\n\n\nSep 14, 2022\n\n\nTristan O’Malley\n\n\n\n\n\n\n\n\n\n\n\n\nAudio Filtering\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 24, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nCoordinate descent failure example\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 21, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nPyro Conditioning\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 20, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nProbabilstic PCA using PyTorch distributions\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 17, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLinear Regression using Pyro\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 17, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nDrawing graphical models\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 15, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nGMM learning\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 14, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLogistic Regression using PyTorch distributions\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 14, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nVariational Inference\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 12, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nMaximum A-Posteriori (MAP) for parameters of univariate and multivariate normal distribution in PyTorch\n\n\n\n\n\n\nML\n\n\nPyTorch\n\n\n\n\n\n\n\n\n\nFeb 11, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nMatrix as transformation and interpreting low rank matrix\n\n\n\n\n\n\nML\n\n\nLA\n\n\n\n\n\n\n\n\n\nFeb 11, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nMaximum Likelihood Estimation (MLE) for parameters of univariate and multivariate normal distribution in PyTorch\n\n\n\n\n\n\nML\n\n\nPyTorch\n\n\n\n\n\n\n\n\n\nFeb 9, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nAutograd in JAX and PyTorch\n\n\n\n\n\n\nML\n\n\nJAX\n\n\nPyTorch\n\n\n\n\n\n\n\n\n\nFeb 9, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nCoin Toss (MLE, MAP, Fully Bayesian) in TF Probability\n\n\n\n\n\n\nML\n\n\nTFP\n\n\nTF\n\n\n\n\n\n\n\n\n\nFeb 7, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSimple Directed Graphical Models in TF Probability\n\n\n\n\n\n\nML\n\n\nTFP\n\n\nTF\n\n\n\n\n\n\n\n\n\nFeb 5, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLinear Regression in TF Probability using JointDistributionCoroutineAutoBatched\n\n\n\n\n\n\nML\n\n\nTFP\n\n\nTF\n\n\n\n\n\n\n\n\n\nFeb 5, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSampling from univariate and multivariate normal distributions using Box-Muller transform\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 4, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nUnderstanding KL-Divergence\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJan 29, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLinear Regression in Tensorflow Probability\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJan 28, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nTesting out some distributions in Tensorflow Probability\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJan 26, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLearning Gaussian Process regression parameters using mini-batch stochastic gradient descent\n\n\n\n\n\n\nML\n\n\n\nHow to learn the parameters of a GP\n\n\n\n\n\nSep 3, 2021\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLinear Regression from scratch in Julia\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nSep 1, 2021\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nProbabilistic Programming in Pyro\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nAug 20, 2021\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nBlurring an image selectively using Affinity Photo\n\n\n\n\n\n\nsetup\n\n\n\nBlurring an image selectively using Affinity Photo\n\n\n\n\n\nJun 19, 2021\n\n\n\n\n\n\n\n\n\n\n\n\nAudio Filtering on the command line and Python\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJun 18, 2021\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nRunning Python scripts on server over ssh and getting back content\n\n\n\n\n\n\nsetup\n\n\n\nRunning Python scripts on server over ssh and getting back content\n\n\n\n\n\nJun 17, 2021\n\n\n\n\n\n\n\n\n\n\n\n\nSome of my shortcuts on the iPad\n\n\n\n\n\n\nsetup\n\n\n\nSome of my shortcuts on the iPad\n\n\n\n\n\nJun 16, 2021\n\n\n\n\n\n\n\n\n\n\n\n\nMy iPad Setup\n\n\n\n\n\n\nsetup\n\n\n\nMy iPad computing setup\n\n\n\n\n\nJun 14, 2021\n\n\n\n\n\n\n\n\n\n\n\n\nMy Mac Setup\n\n\n\n\n\n\nsetup\n\n\n\nMy Mac Setup\n\n\n\n\n\nJun 12, 2021\n\n\n\n\n\n\n\n\n\n\n\n\nA programming introduction to GANs\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 31, 2021\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nUnderstanding Kernels in Gaussian Processes Regression\n\n\n\n\n\n\nML\n\n\n\nUsing GPy and some interactive visualisations for understanding GPR and applying on a real world data set\n\n\n\n\n\nJun 26, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSampling from common distributions\n\n\n\n\n\n\nML\n\n\n\nFrom the ground up!\n\n\n\n\n\nApr 16, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLearning Gaussian Process regression parameters using gradient descent\n\n\n\n\n\n\nML\n\n\n\nHow to learn the parameters of a GP\n\n\n\n\n\nMar 29, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nActive Learning with Bayesian Linear Regression\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to Active Learning with Bayesian Linear Regression.\n\n\n\n\n\nMar 28, 2020\n\n\nZeel Patel & Nipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSome experiments in Gaussian Processes Regression\n\n\n\n\n\n\nML\n\n\n\nUsing GPy and some interactive visualisations for understanding GPR and applying on a real world data set\n\n\n\n\n\nMar 26, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSome Neural Network Classification\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to NNs.\n\n\n\n\n\nMar 8, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nNeural Networks from scratch\n\n\n\n\n\n\nML\n\n\n\nSimple scripts for downloading weather data\n\n\n\n\n\nMar 2, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLearning neural network for XOR\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 28, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nBayesian Linear Regression\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to Bayesian Linear Regression.\n\n\n\n\n\nFeb 20, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nAn Example Markdown Post\n\n\n\n\n\n\nmarkdown\n\n\n\nA minimal example of using markdown with fastpages.\n\n\n\n\n\nJan 14, 2020\n\n\n\n\n\n\n\n\n\n\n\n\nGaussian Processes\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to Gaussian Processes.\n\n\n\n\n\nAug 20, 2019\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nPlacement-Preparation-2018-1-HashMap\n\n\n\n\n\n\nacademia\n\n\n\nHashMaps for programming interviews\n\n\n\n\n\nAug 18, 2018\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nVisualising Electricity Access Over Space and Time\n\n\n\n\n\n\nsustainability\n\n\n\nHow is the world changing over the years!\n\n\n\n\n\nJun 26, 2018\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nMapping location of air quality sensing in India\n\n\n\n\n\n\nair quality\n\n\n\nAQ sensing in India\n\n\n\n\n\nJun 21, 2018\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nActive Learning\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to query by committee strategy for active learning\n\n\n\n\n\nJun 16, 2018\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSignal denoising using RNNs in PyTorch\n\n\n\n\n\n\nML\n\n\n\nDenoising\n\n\n\n\n\nJan 13, 2018\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nCS Ph.D. lessons to my younger self\n\n\n\n\n\n\nacademia\n\n\n\nSome personal reflections..\n\n\n\n\n\nJan 7, 2018\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nNeural Networks for Collaborative Filtering\n\n\n\n\n\n\nML\n\n\n\nNeural networks to learn the embeddings! and how to combine them\n\n\n\n\n\nDec 29, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nRecommender Systems in Keras\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to recommender systems using Keras!\n\n\n\n\n\nDec 18, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nAdagrad based matrix factorization\n\n\n\n\n\n\nML\n\n\n\nAdagrad optimizer for matrix factorisation\n\n\n\n\n\nAug 13, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nProgramatically understanding Adagrad\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nAug 12, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nTop 50 ggplot2 Visualizations in Python - Part 1\n\n\n\n\n\n\nvisualisation\n\n\n\nSame graphic using different libraries!\n\n\n\n\n\nAug 2, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLinear regression with prior (using gradient descent)\n\n\n\n\n\n\nML\n\n\n\nWhat if we start from some prior!\n\n\n\n\n\nJun 15, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nData exploration using widgets in Matplotlib\n\n\n\n\n\n\nvisualisation\n\n\n\nExploring data in Matplotlib\n\n\n\n\n\nJun 14, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nConstrained Non-negative matrix factorisation using CVXPY\n\n\n\n\n\n\nML\n\n\n\nConstrained NMF using CVXPY!\n\n\n\n\n\nApr 21, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nOut of Tensor factorisation\n\n\n\n\n\n\nML\n\n\n\nOut of tensor factorisation\n\n\n\n\n\nApr 20, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nOut of matrix non-negative matrix factorisation\n\n\n\n\n\n\nML\n\n\n\nWhat if we to predict for entries not within the matrix?!\n\n\n\n\n\nApr 19, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nCoin tosses and MCMC\n\n\n\n\n\n\nML\n\n\n\nMCMC simulations for coin tosses!\n\n\n\n\n\nJul 1, 2014\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLatexify Matplotlib\n\n\n\n\n\n\nvisualisation\n\n\n\nTowards amazing plots in research papers!\n\n\n\n\n\nJun 2, 2014\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nProgramatically understanding Expectation Maximization\n\n\n\n\n\n\nML\n\n\n\nMaximize based on what you know, re-estimate!\n\n\n\n\n\nJun 1, 2014\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nProgramatically understanding dynamic time warping (DTW)\n\n\n\n\n\n\nML\n\n\n\nSignal processing for unequal time series!\n\n\n\n\n\nMay 1, 2014\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nGibbs sampling\n\n\n\n\n\n\nML\n\n\n\nProgrammatic introduction\n\n\n\n\n\nMay 1, 2014\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nDenoising using Least Squares\n\n\n\n\n\n\nML\n\n\n\nHow the simple least squares can be used in more ways than you thought!\n\n\n\n\n\nSep 1, 2013\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nHMM Simulation for Continuous HMM\n\n\n\n\n\n\nML\n\n\n\nSimulating a continuous HMM\n\n\n\n\n\nJul 1, 2013\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nHMM Simulation for Unfair Casino Problem\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to HMMs for unfair casino problem\n\n\n\n\n\nJun 1, 2013\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nAggregation in Timeseries using Pandas\n\n\n\n\n\n\nvisualisation\n\n\n\nPandas excellence in timeseries!\n\n\n\n\n\nMay 1, 2013\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nDownloading weather data\n\n\n\n\n\n\nvisualisation\n\n\n\nSimple scripts for downloading weather data\n\n\n\n\n\nApr 1, 2013\n\n\nNipun Batra\n\n\n\n\n\n\nNo matching items" + "text": "Keyboard shortcuts on Mac\n\n\n\n\n\n\nsetup\n\n\n\nKeyboard shortcuts on mac\n\n\n\n\n\nJun 12, 2024\n\n\n\n\n\n\n\n\n\n\n\n\nAttention\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 30, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nForecasting\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 30, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nRNN\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 30, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nAttention in Sequence to Sequence\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 17, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSample\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 7, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nObject detection\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 1, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nMixture of Experts\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJan 18, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLogo\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJan 2, 2024\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nBasic Neural Process\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nDec 28, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nUsing a neural network as a covariance function\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nDec 22, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nTowards transformers\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nDec 21, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nNaive implementation of Strassen’s algorithm\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nDec 20, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nYouTube video to transcript using openAI whisper and summary using OLLama\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nDec 18, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nReinforcement Learning\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nDec 11, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSuper Resolution using U-Net like architecture\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJul 12, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nTrees using NetworkX\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJun 12, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nPositional Encoding\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJun 9, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nBayesian Active Learning with Disagreement (BALD)\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 26, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nGradient wrt input for a simple model for adversarial attacks\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 5, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nPINN\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 5, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSIREN paper implementation\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nApr 28, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSIREN paper\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nApr 27, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nHeteroskedastic and Homoskedastic MLPs in PyTorch for regression\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nApr 10, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nTensorboard\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMar 21, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nResidual Connections in PyTorch\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMar 18, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nAutoML PyTorch\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 25, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nStacking (Meta Learning)\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 22, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nStacking (Ensemble Learning)\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 21, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nMultivariate Taylor Series\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 20, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nComparing GP libraries\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 6, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSVD\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 1, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nConformal Prediction\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJan 19, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nVSCode Settings and Tips\n\n\n\n\n\n\nML\n\n\nvscode\n\n\n\n\n\n\n\n\n\nJan 17, 2023\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nBinomial and Poisson distribution\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nNov 20, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSome useful tidibts in sympy\n\n\n\n\n\n\nML\n\n\n\nSome useful tidibts in sympy\n\n\n\n\n\nNov 9, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nAutoencoders in JAX\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to Autoencoders in JAX\n\n\n\n\n\nNov 4, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nCalibration\n\n\n\n\n\n\nml\n\n\n\nProbability Calibration\n\n\n\n\n\nOct 27, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nMulti-output Gaussian Process\n\n\n\n\n\n\nml\n\n\n\nMulti-output Gaussian Process\n\n\n\n\n\nOct 25, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nWelcome To My Blog\n\n\n\n\n\n\nnews\n\n\n\n\n\n\n\n\n\nSep 14, 2022\n\n\nTristan O’Malley\n\n\n\n\n\n\n\n\n\n\n\n\nAudio Filtering\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 24, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nCoordinate descent failure example\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 21, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nPyro Conditioning\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 20, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nProbabilstic PCA using PyTorch distributions\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 17, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLinear Regression using Pyro\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 17, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nDrawing graphical models\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 15, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nGMM learning\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 14, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLogistic Regression using PyTorch distributions\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 14, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nVariational Inference\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 12, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nMaximum A-Posteriori (MAP) for parameters of univariate and multivariate normal distribution in PyTorch\n\n\n\n\n\n\nML\n\n\nPyTorch\n\n\n\n\n\n\n\n\n\nFeb 11, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nMatrix as transformation and interpreting low rank matrix\n\n\n\n\n\n\nML\n\n\nLA\n\n\n\n\n\n\n\n\n\nFeb 11, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nMaximum Likelihood Estimation (MLE) for parameters of univariate and multivariate normal distribution in PyTorch\n\n\n\n\n\n\nML\n\n\nPyTorch\n\n\n\n\n\n\n\n\n\nFeb 9, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nAutograd in JAX and PyTorch\n\n\n\n\n\n\nML\n\n\nJAX\n\n\nPyTorch\n\n\n\n\n\n\n\n\n\nFeb 9, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nCoin Toss (MLE, MAP, Fully Bayesian) in TF Probability\n\n\n\n\n\n\nML\n\n\nTFP\n\n\nTF\n\n\n\n\n\n\n\n\n\nFeb 7, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSimple Directed Graphical Models in TF Probability\n\n\n\n\n\n\nML\n\n\nTFP\n\n\nTF\n\n\n\n\n\n\n\n\n\nFeb 5, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLinear Regression in TF Probability using JointDistributionCoroutineAutoBatched\n\n\n\n\n\n\nML\n\n\nTFP\n\n\nTF\n\n\n\n\n\n\n\n\n\nFeb 5, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSampling from univariate and multivariate normal distributions using Box-Muller transform\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 4, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nUnderstanding KL-Divergence\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJan 29, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLinear Regression in Tensorflow Probability\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJan 28, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nTesting out some distributions in Tensorflow Probability\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJan 26, 2022\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLearning Gaussian Process regression parameters using mini-batch stochastic gradient descent\n\n\n\n\n\n\nML\n\n\n\nHow to learn the parameters of a GP\n\n\n\n\n\nSep 3, 2021\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLinear Regression from scratch in Julia\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nSep 1, 2021\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nProbabilistic Programming in Pyro\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nAug 20, 2021\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nBlurring an image selectively using Affinity Photo\n\n\n\n\n\n\nsetup\n\n\n\nBlurring an image selectively using Affinity Photo\n\n\n\n\n\nJun 19, 2021\n\n\n\n\n\n\n\n\n\n\n\n\nAudio Filtering on the command line and Python\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nJun 18, 2021\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nRunning Python scripts on server over ssh and getting back content\n\n\n\n\n\n\nsetup\n\n\n\nRunning Python scripts on server over ssh and getting back content\n\n\n\n\n\nJun 17, 2021\n\n\n\n\n\n\n\n\n\n\n\n\nSome of my shortcuts on the iPad\n\n\n\n\n\n\nsetup\n\n\n\nSome of my shortcuts on the iPad\n\n\n\n\n\nJun 16, 2021\n\n\n\n\n\n\n\n\n\n\n\n\nMy iPad Setup\n\n\n\n\n\n\nsetup\n\n\n\nMy iPad computing setup\n\n\n\n\n\nJun 14, 2021\n\n\n\n\n\n\n\n\n\n\n\n\nMy Mac Setup\n\n\n\n\n\n\nsetup\n\n\n\nMy Mac Setup\n\n\n\n\n\nJun 12, 2021\n\n\n\n\n\n\n\n\n\n\n\n\nA programming introduction to GANs\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nMay 31, 2021\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nUnderstanding Kernels in Gaussian Processes Regression\n\n\n\n\n\n\nML\n\n\n\nUsing GPy and some interactive visualisations for understanding GPR and applying on a real world data set\n\n\n\n\n\nJun 26, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSampling from common distributions\n\n\n\n\n\n\nML\n\n\n\nFrom the ground up!\n\n\n\n\n\nApr 16, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLearning Gaussian Process regression parameters using gradient descent\n\n\n\n\n\n\nML\n\n\n\nHow to learn the parameters of a GP\n\n\n\n\n\nMar 29, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nActive Learning with Bayesian Linear Regression\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to Active Learning with Bayesian Linear Regression.\n\n\n\n\n\nMar 28, 2020\n\n\nZeel Patel & Nipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSome experiments in Gaussian Processes Regression\n\n\n\n\n\n\nML\n\n\n\nUsing GPy and some interactive visualisations for understanding GPR and applying on a real world data set\n\n\n\n\n\nMar 26, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSome Neural Network Classification\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to NNs.\n\n\n\n\n\nMar 8, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nNeural Networks from scratch\n\n\n\n\n\n\nML\n\n\n\nSimple scripts for downloading weather data\n\n\n\n\n\nMar 2, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLearning neural network for XOR\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nFeb 28, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nBayesian Linear Regression\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to Bayesian Linear Regression.\n\n\n\n\n\nFeb 20, 2020\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nAn Example Markdown Post\n\n\n\n\n\n\nmarkdown\n\n\n\nA minimal example of using markdown with fastpages.\n\n\n\n\n\nJan 14, 2020\n\n\n\n\n\n\n\n\n\n\n\n\nGaussian Processes\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to Gaussian Processes.\n\n\n\n\n\nAug 20, 2019\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nPlacement-Preparation-2018-1-HashMap\n\n\n\n\n\n\nacademia\n\n\n\nHashMaps for programming interviews\n\n\n\n\n\nAug 18, 2018\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nVisualising Electricity Access Over Space and Time\n\n\n\n\n\n\nsustainability\n\n\n\nHow is the world changing over the years!\n\n\n\n\n\nJun 26, 2018\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nMapping location of air quality sensing in India\n\n\n\n\n\n\nair quality\n\n\n\nAQ sensing in India\n\n\n\n\n\nJun 21, 2018\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nActive Learning\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to query by committee strategy for active learning\n\n\n\n\n\nJun 16, 2018\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nSignal denoising using RNNs in PyTorch\n\n\n\n\n\n\nML\n\n\n\nDenoising\n\n\n\n\n\nJan 13, 2018\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nCS Ph.D. lessons to my younger self\n\n\n\n\n\n\nacademia\n\n\n\nSome personal reflections..\n\n\n\n\n\nJan 7, 2018\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nNeural Networks for Collaborative Filtering\n\n\n\n\n\n\nML\n\n\n\nNeural networks to learn the embeddings! and how to combine them\n\n\n\n\n\nDec 29, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nRecommender Systems in Keras\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to recommender systems using Keras!\n\n\n\n\n\nDec 18, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nAdagrad based matrix factorization\n\n\n\n\n\n\nML\n\n\n\nAdagrad optimizer for matrix factorisation\n\n\n\n\n\nAug 13, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nProgramatically understanding Adagrad\n\n\n\n\n\n\nML\n\n\n\n\n\n\n\n\n\nAug 12, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nTop 50 ggplot2 Visualizations in Python - Part 1\n\n\n\n\n\n\nvisualisation\n\n\n\nSame graphic using different libraries!\n\n\n\n\n\nAug 2, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLinear regression with prior (using gradient descent)\n\n\n\n\n\n\nML\n\n\n\nWhat if we start from some prior!\n\n\n\n\n\nJun 15, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nData exploration using widgets in Matplotlib\n\n\n\n\n\n\nvisualisation\n\n\n\nExploring data in Matplotlib\n\n\n\n\n\nJun 14, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nConstrained Non-negative matrix factorisation using CVXPY\n\n\n\n\n\n\nML\n\n\n\nConstrained NMF using CVXPY!\n\n\n\n\n\nApr 21, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nOut of Tensor factorisation\n\n\n\n\n\n\nML\n\n\n\nOut of tensor factorisation\n\n\n\n\n\nApr 20, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nOut of matrix non-negative matrix factorisation\n\n\n\n\n\n\nML\n\n\n\nWhat if we to predict for entries not within the matrix?!\n\n\n\n\n\nApr 19, 2017\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nCoin tosses and MCMC\n\n\n\n\n\n\nML\n\n\n\nMCMC simulations for coin tosses!\n\n\n\n\n\nJul 1, 2014\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nLatexify Matplotlib\n\n\n\n\n\n\nvisualisation\n\n\n\nTowards amazing plots in research papers!\n\n\n\n\n\nJun 2, 2014\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nProgramatically understanding Expectation Maximization\n\n\n\n\n\n\nML\n\n\n\nMaximize based on what you know, re-estimate!\n\n\n\n\n\nJun 1, 2014\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nProgramatically understanding dynamic time warping (DTW)\n\n\n\n\n\n\nML\n\n\n\nSignal processing for unequal time series!\n\n\n\n\n\nMay 1, 2014\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nGibbs sampling\n\n\n\n\n\n\nML\n\n\n\nProgrammatic introduction\n\n\n\n\n\nMay 1, 2014\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nDenoising using Least Squares\n\n\n\n\n\n\nML\n\n\n\nHow the simple least squares can be used in more ways than you thought!\n\n\n\n\n\nSep 1, 2013\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nHMM Simulation for Continuous HMM\n\n\n\n\n\n\nML\n\n\n\nSimulating a continuous HMM\n\n\n\n\n\nJul 1, 2013\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nHMM Simulation for Unfair Casino Problem\n\n\n\n\n\n\nML\n\n\n\nA programming introduction to HMMs for unfair casino problem\n\n\n\n\n\nJun 1, 2013\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nAggregation in Timeseries using Pandas\n\n\n\n\n\n\nvisualisation\n\n\n\nPandas excellence in timeseries!\n\n\n\n\n\nMay 1, 2013\n\n\nNipun Batra\n\n\n\n\n\n\n\n\n\n\n\n\nDownloading weather data\n\n\n\n\n\n\nvisualisation\n\n\n\nSimple scripts for downloading weather data\n\n\n\n\n\nApr 1, 2013\n\n\nNipun Batra\n\n\n\n\n\n\nNo matching items" }, { - "objectID": "posts/vscode-tips/index.html", - "href": "posts/vscode-tips/index.html", - "title": "VSCode Settings and Tips", + "objectID": "posts/2020-02-20-bayesian-linear-regression.html", + "href": "posts/2020-02-20-bayesian-linear-regression.html", + "title": "Bayesian Linear Regression", "section": "", - "text": "Most used keyboard shortcuts\n\n\n\nShortcut\nAction\n\n\n\n\nCmd + Shift + P\nOpen command palette\n\n\nCtrl + `\nToggle terminal\n\n\nCmd + B\nToggle sidebar\n\n\nCmd + Ctrl + F\nToggle full screen" + "text": "import numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\n%matplotlib inline\n\n\nx = np.linspace(-1, 1, 50).reshape(-1, 1)\n\n\ny = 5*x + 4 \nnoise = (np.abs(x.flatten())*np.random.randn(len(x))).reshape(-1,1)\ny = y + noise\n\n\nplt.scatter(x, y)\nplt.plot(x, 5*x + 4, 'k')\n\n\n\n\n\n\n\n\n\nfrom scipy.stats import multivariate_normal\nfrom matplotlib import cm\ncov = np.array([[ 1 , 0], [0, 1]])\nvar = multivariate_normal(mean=[0,0], cov=cov)\nx_grid, y_grid = np.mgrid[-1:1:.01, -1:1:.01]\npos = np.dstack((x_grid, y_grid))\nz = var.pdf(pos)\nplt.contourf(x_grid, y_grid, z)\nplt.gca().set_aspect('equal')\nplt.xlabel(r\"$\\theta_0$\")\nplt.ylabel(r\"$\\theta_1$\")\nplt.title(r\"Prior distribution of $\\theta = f(\\mu, \\Sigma)$\")\nplt.colorbar()\n\n\n\n\n\n\n\n\n\\[\n\\prod_{i=1}^{n} \\frac{1}{\\sqrt{2 \\pi \\sigma^{2}}} e^{-\\frac{(y_{i}-\\hat{y}_{i})^{2}}{2 \\sigma^{2}}}\n\\]\n\nSample from prior\n\nn_samples = 20\nfor n in range(n_samples):\n theta_0_s, theta_1_s = var.rvs()\n plt.plot(x, theta_1_s*x + theta_0_s, color='k',alpha=0.2)\nplt.scatter(x, y)\n\n\n\n\n\n\n\n\n\n\nLikelihood of theta\n\ndef likelihood(theta_0, theta_1, x, y, sigma):\n s = 0\n x_plus_1 = np.hstack((np.ones_like(x), x))\n\n for i in range(len(x)):\n y_i_hat = x_plus_1[i, :]@np.array([theta_0, theta_1])\n s += (y[i,:]-y_i_hat)**2\n \n \n return np.exp(-s/(2*sigma*sigma))/np.sqrt(2*np.pi*sigma*sigma)\n\n\nlikelihood(-1, 1, x, y, 4)\n\narray([1.00683395e-22])\n\n\n\nx_grid_2, y_grid_2 = np.mgrid[0:8:.1, 0:8:.1]\n\nli = np.zeros_like(x_grid_2)\nfor i in range(x_grid_2.shape[0]):\n for j in range(x_grid_2.shape[1]):\n li[i, j] = likelihood(x_grid_2[i, j], y_grid_2[i, j], x, y, 4)\n \n\n\nplt.contourf(x_grid_2, y_grid_2, li)\nplt.gca().set_aspect('equal')\nplt.xlabel(r\"$\\theta_0$\")\nplt.ylabel(r\"$\\theta_1$\")\nplt.colorbar()\nplt.scatter(4, 5, s=200, marker='*', color='r')\nplt.title(r\"Likelihood as a function of ($\\theta_0, \\theta_1$)\")\n\nText(0.5, 1.0, 'Likelihood as a function of ($\\\\theta_0, \\\\theta_1$)')\n\n\n\n\n\n\n\n\n\n\n\nLikelihood of \\(\\sigma^2\\)\n\nx_plus_1 = np.hstack((np.ones_like(x), x))\n\ntheta_mle = np.linalg.inv(x_plus_1.T@x_plus_1)@(x_plus_1.T@y)\nsigma_2_mle = np.linalg.norm(y - x_plus_1@theta_mle)**2\nsigma_mle = np.sqrt(sigma_2_mle)\nsigma_mle\n\n4.128685902124939\n\n\n\n\nPosterior\n\\[\n\\begin{aligned}\np(\\boldsymbol{\\theta} | \\mathcal{X}, \\mathcal{Y}) &=\\mathcal{N}\\left(\\boldsymbol{\\theta} | \\boldsymbol{m}_{N}, \\boldsymbol{S}_{N}\\right) \\\\\n\\boldsymbol{S}_{N} &=\\left(\\boldsymbol{S}_{0}^{-1}+\\sigma^{-2} \\boldsymbol{\\Phi}^{\\top} \\boldsymbol{\\Phi}\\right)^{-1} \\\\\n\\boldsymbol{m}_{N} &=\\boldsymbol{S}_{N}\\left(\\boldsymbol{S}_{0}^{-1} \\boldsymbol{m}_{0}+\\sigma^{-2} \\boldsymbol{\\Phi}^{\\top} \\boldsymbol{y}\\right)\n\\end{aligned}\n\\]\n\nS0 = np.array([[ 1 , 0], [0, 1]])\nM0 = np.array([0, 0])\n\nSN = np.linalg.inv(np.linalg.inv(S0) + (sigma_mle**-2)*x_plus_1.T@x_plus_1)\nMN = SN@(np.linalg.inv(S0)@M0 + (sigma_mle**-2)*(x_plus_1.T@y).squeeze())\n\n\nMN, SN\n\n(array([2.97803341, 2.54277597]), array([[2.54243881e-01, 2.97285330e-17],\n [2.97285330e-17, 4.95625685e-01]]))\n\n\n\nfrom scipy.stats import multivariate_normal\nfrom matplotlib import cm\ncov = np.array([[ 1 , 0], [0, 1]])\nvar_pos = multivariate_normal(mean=MN, cov=SN)\nx_grid, y_grid = np.mgrid[0:8:.1, 0:8:.1]\npos = np.dstack((x_grid, y_grid))\nz = var_pos.pdf(pos)\nplt.contourf(x_grid, y_grid, z)\nplt.gca().set_aspect('equal')\nplt.xlabel(r\"$\\theta_0$\")\nplt.ylabel(r\"$\\theta_1$\")\nplt.title(r\"Posterior distribution of $\\theta = f(\\mu, \\Sigma)$\")\nplt.scatter(4, 5, s=200, marker='*', color='r', label='MLE')\nplt.scatter(MN[0], MN[1], s=100, marker='^', color='black', label='MAP')\n\nplt.colorbar()\nplt.legend()\nplt.savefig(\"../images/blr-map.png\")\n\n\n\n\n\n\n\n\nSample from posterior\n\nn_samples = 20\nfor n in range(n_samples):\n theta_0_s, theta_1_s = var_pos.rvs()\n plt.plot(x, theta_1_s*x + theta_0_s, color='k',alpha=0.2)\nplt.scatter(x, y)\n\n\n\n\n\n\n\n\n\n\nPosterior predictions\n\\[\n\\begin{aligned}\np\\left(y_{*} | \\mathcal{X}, \\mathcal{Y}, \\boldsymbol{x}_{*}\\right) &=\\int p\\left(y_{*} | \\boldsymbol{x}_{*}, \\boldsymbol{\\theta}\\right) p(\\boldsymbol{\\theta} | \\mathcal{X}, \\mathcal{Y}) \\mathrm{d} \\boldsymbol{\\theta} \\\\\n&=\\int \\mathcal{N}\\left(y_{*} | \\boldsymbol{\\phi}^{\\top}\\left(\\boldsymbol{x}_{*}\\right) \\boldsymbol{\\theta}, \\sigma^{2}\\right) \\mathcal{N}\\left(\\boldsymbol{\\theta} | \\boldsymbol{m}_{N}, \\boldsymbol{S}_{N}\\right) \\mathrm{d} \\boldsymbol{\\theta} \\\\\n&=\\mathcal{N}\\left(y_{*} | \\boldsymbol{\\phi}^{\\top}\\left(\\boldsymbol{x}_{*}\\right) \\boldsymbol{m}_{N}, \\boldsymbol{\\phi}^{\\top}\\left(\\boldsymbol{x}_{*}\\right) \\boldsymbol{S}_{N} \\boldsymbol{\\phi}\\left(\\boldsymbol{x}_{*}\\right)+\\sigma^{2}\\right)\n\\end{aligned}\n\\]\nFor a point \\(x*\\)\nPredictive mean = \\(X^Tm_N\\)\nPredictive variance = \\(X^TS_NX + \\sigma^2\\)\n\nx_plus_1.T.shape, SN.shape, x_plus_1.shape\n\n((2, 50), (2, 2), (50, 2))\n\n\n\npred_var = x_plus_1@SN@x_plus_1.T\npred_var.shape\n\n(50, 50)\n\n\n\n## Marginal\nindividual_var = pred_var.diagonal()\n\n\ny_hat_map = x_plus_1@MN\n\nplt.plot(x, y_hat_map, color='black')\nplt.fill_between(x.flatten(), y_hat_map-individual_var, y_hat_map+individual_var, alpha=0.2, color='black')\nplt.scatter(x, y)" }, { "objectID": "posts/2022-02-05-simple-dgm.html", @@ -564,14 +571,14 @@ "href": "posts/towards-transformers.html", "title": "Towards transformers", "section": "", - "text": "Basic Imports\n\nimport tiktoken\n\n\nencoding = tiktoken.get_encoding(\"cl100k_base\")\n\n\nencoding.encode(\"Hello World! This is a simple notebook\")\n\n[9906, 4435, 0, 1115, 374, 264, 4382, 38266]\n\n\n\nencoding.decode([9906, 4435, 0, 1115])\n\n'Hello World! This'\n\n\n\nser = {}\nn =20\nfor i in range(n**2):\n ser[i] = encoding.decode([i])\n\n\nimport pandas as pd\n\n\npd.DataFrame(pd.Series(ser).values.reshape(n,n))\n\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19\n\n\n\n\n0\n!\n\"\n#\n$\n%\n&\n'\n(\n)\n*\n+\n,\n-\n.\n/\n0\n1\n2\n3\n4\n\n\n1\n5\n6\n7\n8\n9\n:\n;\n<\n=\n>\n?\n@\nA\nB\nC\nD\nE\nF\nG\nH\n\n\n2\nI\nJ\nK\nL\nM\nN\nO\nP\nQ\nR\nS\nT\nU\nV\nW\nX\nY\nZ\n[\n\\\n\n\n3\n]\n^\n_\n`\na\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\n\n\n4\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz\n{\n|\n}\n~\n�\n�\n�\n�\n�\n�\n\n\n5\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n\n\n6\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n\n\n7\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n\n\n8\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n\n\n9\n�\n�\n�\n�\n�\n�\n�\n�\n\n\u0001\n\u0002\n\u0003\n\u0004\n\u0005\n\u0006\n\u0007\n\b\n\\t\n\\n\n\n\n\n10\n\n\\r\n\u000e\n\u000f\n\u0010\n\u0011\n\u0012\n\u0013\n\u0014\n\u0015\n\u0016\n\u0017\n\u0018\n\u0019\n\u001a\n\u001b\n\n\n\n\n\n\n11\n\n\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n\n\n12\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n\n\nin\nt\n\n\n13\n\ner\n\non\na\nre\nat\nst\nen\nor\nth\n\\n\\n\nc\nle\ns\nit\nan\nar\nal\nthe\n\n\n14\n;\\n\np\nf\nou\n=\nis\n\ning\nes\nw\nion\ned\nic\nb\nd\net\nm\no\n\\t\\t\nro\n\n\n15\nas\nel\nct\nnd\nin\nh\nent\nid\nn\nam\n\nto\nre\n--\n{\nof\nom\n);\\n\nim\n\\r\\n\n\n\n16\n(\nil\n//\nand\nur\nse\nl\nex\nS\nad\n\"\nch\nut\nif\n**\n}\nem\nol\n\nth\n\n\n17\n)\\n\n{\\n\ng\nig\niv\n,\\n\nce\nod\nv\nate\nT\nag\nay\n*\not\nus\nC\nst\nI\nun\n\n\n18\nul\nue\nA\now\n'\new\n<\nation\n()\nfor\nab\nort\num\name\nis\npe\ntr\nck\n�\ny\n\n\n19\nist\n----\n.\\n\\n\nhe\ne\nlo\nM\nbe\ners\non\ncon\nap\nub\nP\n\nass\nint\n>\\n\nly\nurn" + "text": "Basic Imports\n\nimport tiktoken\n\n\nencoding = tiktoken.get_encoding(\"cl100k_base\")\n\n\nencoding.encode(\"Hello World! This is a simple notebook\")\n\n[9906, 4435, 0, 1115, 374, 264, 4382, 38266]\n\n\n\nencoding.decode([9906, 4435, 0, 1115])\n\n'Hello World! This'\n\n\n\nser = {}\nn =20\nfor i in range(n**2):\n ser[i] = encoding.decode([i])\n\n\nimport pandas as pd\n\n\npd.DataFrame(pd.Series(ser).values.reshape(n,n))\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19\n\n\n\n\n0\n!\n\"\n#\n$\n%\n&\n'\n(\n)\n*\n+\n,\n-\n.\n/\n0\n1\n2\n3\n4\n\n\n1\n5\n6\n7\n8\n9\n:\n;\n<\n=\n>\n?\n@\nA\nB\nC\nD\nE\nF\nG\nH\n\n\n2\nI\nJ\nK\nL\nM\nN\nO\nP\nQ\nR\nS\nT\nU\nV\nW\nX\nY\nZ\n[\n\\\n\n\n3\n]\n^\n_\n`\na\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\n\n\n4\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz\n{\n|\n}\n~\n�\n�\n�\n�\n�\n�\n\n\n5\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n\n\n6\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n\n\n7\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n\n\n8\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n\n\n9\n�\n�\n�\n�\n�\n�\n�\n�\n\n\u0001\n\u0002\n\u0003\n\u0004\n\u0005\n\u0006\n\u0007\n\b\n\\t\n\\n\n\n\n\n10\n\n\\r\n\u000e\n\u000f\n\u0010\n\u0011\n\u0012\n\u0013\n\u0014\n\u0015\n\u0016\n\u0017\n\u0018\n\u0019\n\u001a\n\u001b\n\n\n\n\n\n\n11\n\n\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n\n\n12\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n�\n\n\nin\nt\n\n\n13\n\ner\n\non\na\nre\nat\nst\nen\nor\nth\n\\n\\n\nc\nle\ns\nit\nan\nar\nal\nthe\n\n\n14\n;\\n\np\nf\nou\n=\nis\n\ning\nes\nw\nion\ned\nic\nb\nd\net\nm\no\n\\t\\t\nro\n\n\n15\nas\nel\nct\nnd\nin\nh\nent\nid\nn\nam\n\nto\nre\n--\n{\nof\nom\n);\\n\nim\n\\r\\n\n\n\n16\n(\nil\n//\nand\nur\nse\nl\nex\nS\nad\n\"\nch\nut\nif\n**\n}\nem\nol\n\nth\n\n\n17\n)\\n\n{\\n\ng\nig\niv\n,\\n\nce\nod\nv\nate\nT\nag\nay\n*\not\nus\nC\nst\nI\nun\n\n\n18\nul\nue\nA\now\n'\new\n<\nation\n()\nfor\nab\nort\num\name\nis\npe\ntr\nck\n�\ny\n\n\n19\nist\n----\n.\\n\\n\nhe\ne\nlo\nM\nbe\ners\non\ncon\nap\nub\nP\n\nass\nint\n>\\n\nly\nurn" }, { "objectID": "posts/autoencoder.html", "href": "posts/autoencoder.html", "title": "Autoencoders in JAX", "section": "", - "text": "Imports\n\nimport jax\nimport jax.numpy as jnp\nimport numpy as np\nimport optax\n\nimport matplotlib.pyplot as plt\nimport matplotlib.pyplot as plt\n%matplotlib inline\n%config InlineBackend.figure_format='retina'\n\nimport jax.random as random\nimport tensorflow_probability.substrates.jax as tfp\n\nfrom flax import linen as nn\nfrom typing import Any, Callable, Sequence\n\nimport seaborn as sns\nimport pandas as pd\n\nfrom bayes_opt import BayesianOptimization\n\n\n\n\nCreate a simple 2d dataset\n\nX = random.multivariate_normal(\n key=random.PRNGKey(0),\n shape=(100,),\n mean=jnp.array([1, 3]),\n cov=jnp.array([[1.0, -0.5], [-0.5, 2.0]]),\n)\n\n\nX.shape\n\n(100, 2)\n\n\n\nplt.scatter(X[:, 0], X[:, 1])\n# plt.gca().set_aspect(\"equal\")\n\n\n\n\n\n\n\n\n\nclass Encoder(nn.Module):\n bottleneck: int\n\n @nn.compact\n def __call__(self, x):\n x = nn.Dense(5)(x)\n x = nn.selu(x)\n x = nn.Dense(features=self.bottleneck)(x)\n return x\n\n\nclass Decoder(nn.Module):\n out: int\n\n @nn.compact\n def __call__(self, x):\n x = nn.Dense(5)(x)\n x = nn.selu(x)\n x = nn.Dense(features=self.out)(x)\n return x\n\n\nenc = Encoder(bottleneck=1)\n\ndec = Decoder(out=2)\n\n\nparams_enc = enc.init(random.PRNGKey(0), X)\nX_bottlenecked = enc.apply(params_enc, X)\nX_bottlenecked.shape\n\n(100, 1)\n\n\n\nprint(enc.tabulate(random.PRNGKey(0), X))\n\nprint(dec.tabulate(random.PRNGKey(0), X_bottlenecked))\n\n\n Encoder Summary \n┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┓\n┃ path ┃ module ┃ inputs ┃ outputs ┃ params ┃\n┡━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━┩\n│ │ Encoder │ float32[100,2] │ float32[100,1] │ │\n├─────────┼─────────┼────────────────┼────────────────┼──────────────────────┤\n│ Dense_0 │ Dense │ float32[100,2] │ float32[100,5] │ bias: float32[5] │\n│ │ │ │ │ kernel: float32[2,5] │\n│ │ │ │ │ │\n│ │ │ │ │ 15 (60 B) │\n├─────────┼─────────┼────────────────┼────────────────┼──────────────────────┤\n│ Dense_1 │ Dense │ float32[100,5] │ float32[100,1] │ bias: float32[1] │\n│ │ │ │ │ kernel: float32[5,1] │\n│ │ │ │ │ │\n│ │ │ │ │ 6 (24 B) │\n├─────────┼─────────┼────────────────┼────────────────┼──────────────────────┤\n│ │ │ │ Total │ 21 (84 B) │\n└─────────┴─────────┴────────────────┴────────────────┴──────────────────────┘\n \n Total Parameters: 21 (84 B) \n\n\n\n Decoder Summary \n┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┓\n┃ path ┃ module ┃ inputs ┃ outputs ┃ params ┃\n┡━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━┩\n│ │ Decoder │ float32[100,1] │ float32[100,2] │ │\n├─────────┼─────────┼────────────────┼────────────────┼──────────────────────┤\n│ Dense_0 │ Dense │ float32[100,1] │ float32[100,5] │ bias: float32[5] │\n│ │ │ │ │ kernel: float32[1,5] │\n│ │ │ │ │ │\n│ │ │ │ │ 10 (40 B) │\n├─────────┼─────────┼────────────────┼────────────────┼──────────────────────┤\n│ Dense_1 │ Dense │ float32[100,5] │ float32[100,2] │ bias: float32[2] │\n│ │ │ │ │ kernel: float32[5,2] │\n│ │ │ │ │ │\n│ │ │ │ │ 12 (48 B) │\n├─────────┼─────────┼────────────────┼────────────────┼──────────────────────┤\n│ │ │ │ Total │ 22 (88 B) │\n└─────────┴─────────┴────────────────┴────────────────┴──────────────────────┘\n \n Total Parameters: 22 (88 B) \n\n\n\n\n\nclass AE(nn.Module):\n bottleneck: int\n out: int\n def setup(self):\n # Alternative to @nn.compact -> explicitly define modules\n # Better for later when we want to access the encoder and decoder explicitly\n self.encoder = Encoder(bottleneck=self.bottleneck)\n self.decoder = Decoder(out=self.out)\n\n def __call__(self, x):\n\n z = self.encoder(x)\n x_hat = self.decoder(z)\n return x_hat\n\n\nbottleneck_size = 1\nout_size = X.shape[1]\nae = AE(bottleneck_size, out_size)\n\n\nae\n\nAE(\n # attributes\n bottleneck = 1\n out = 2\n)\n\n\n\nprint(ae.tabulate(random.PRNGKey(0), X))\n\n\n AE Summary \n┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓\n┃ path ┃ module ┃ inputs ┃ outputs ┃ params ┃\n┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩\n│ │ AE │ float32[100,2] │ float32[100,2] │ │\n├─────────────────┼─────────┼────────────────┼────────────────┼────────────────┤\n│ encoder │ Encoder │ float32[100,2] │ float32[100,1] │ │\n├─────────────────┼─────────┼────────────────┼────────────────┼────────────────┤\n│ encoder/Dense_0 │ Dense │ float32[100,2] │ float32[100,5] │ bias: │\n│ │ │ │ │ float32[5] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[2,5] │\n│ │ │ │ │ │\n│ │ │ │ │ 15 (60 B) │\n├─────────────────┼─────────┼────────────────┼────────────────┼────────────────┤\n│ encoder/Dense_1 │ Dense │ float32[100,5] │ float32[100,1] │ bias: │\n│ │ │ │ │ float32[1] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[5,1] │\n│ │ │ │ │ │\n│ │ │ │ │ 6 (24 B) │\n├─────────────────┼─────────┼────────────────┼────────────────┼────────────────┤\n│ decoder │ Decoder │ float32[100,1] │ float32[100,2] │ │\n├─────────────────┼─────────┼────────────────┼────────────────┼────────────────┤\n│ decoder/Dense_0 │ Dense │ float32[100,1] │ float32[100,5] │ bias: │\n│ │ │ │ │ float32[5] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[1,5] │\n│ │ │ │ │ │\n│ │ │ │ │ 10 (40 B) │\n├─────────────────┼─────────┼────────────────┼────────────────┼────────────────┤\n│ decoder/Dense_1 │ Dense │ float32[100,5] │ float32[100,2] │ bias: │\n│ │ │ │ │ float32[2] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[5,2] │\n│ │ │ │ │ │\n│ │ │ │ │ 12 (48 B) │\n├─────────────────┼─────────┼────────────────┼────────────────┼────────────────┤\n│ │ │ │ Total │ 43 (172 B) │\n└─────────────────┴─────────┴────────────────┴────────────────┴────────────────┘\n \n Total Parameters: 43 (172 B) \n\n\n\n\n\nparams = ae.init(random.PRNGKey(0), X)\nparams\n\nFrozenDict({\n params: {\n encoder: {\n Dense_0: {\n kernel: DeviceArray([[ 0.17535934, -1.0953957 , 0.69273657, -0.26352578,\n 0.63077825],\n [ 0.36360174, -0.73782593, -0.5395247 , -0.41536337,\n -0.30090812]], dtype=float32),\n bias: DeviceArray([0., 0., 0., 0., 0.], dtype=float32),\n },\n Dense_1: {\n kernel: DeviceArray([[-0.64744544],\n [ 0.4855265 ],\n [-0.82133824],\n [ 0.62454295],\n [ 0.6013553 ]], dtype=float32),\n bias: DeviceArray([0.], dtype=float32),\n },\n },\n decoder: {\n Dense_0: {\n kernel: DeviceArray([[-0.5305567 , 1.1100855 , -0.31129056, 0.43152457,\n -0.09589562]], dtype=float32),\n bias: DeviceArray([0., 0., 0., 0., 0.], dtype=float32),\n },\n Dense_1: {\n kernel: DeviceArray([[-0.76956064, 0.13031492],\n [ 0.11736098, 0.47368795],\n [-0.12549445, -0.31066778],\n [-0.4392067 , -0.9067152 ],\n [-0.86761785, 0.42325035]], dtype=float32),\n bias: DeviceArray([0., 0.], dtype=float32),\n },\n },\n },\n})\n\n\n\nX_hat = ae.apply(params, X)\nX_hat.shape\n\n(100, 2)\n\n\n\ntry:\n ae.encoder\nexcept:\n pass\n # Trying to figure this out\n # https://github.com/google/flax/discussions/2602\n\n\n# Encoded values/latent representation\nencoded_1d = Encoder(1).apply({\"params\": params[\"params\"][\"encoder\"]}, X).flatten()\nencoded_1d\n\nDeviceArray([-2.4718695, -2.1964364, -2.6823573, -2.4936147, -1.7122931,\n -1.8346143, -2.0767107, -1.8570523, -1.7632042, -2.067935 ,\n -2.2317708, -2.14561 , -1.0023856, -2.1458383, -2.3645976,\n -1.9418356, -2.7020268, -1.6407721, -1.8281609, -2.2202983,\n -2.517499 , -2.5888596, -2.0095935, -2.4470625, -2.18571 ,\n -1.9742887, -1.8921608, -2.245328 , -0.8897901, -2.5329056,\n -2.2861118, -1.5862433, -2.2295656, -2.496296 , -2.404385 ,\n -2.0180435, -1.8416756, -1.858724 , -2.0980945, -1.777173 ,\n -2.0027544, -2.1870096, -2.44952 , -1.7563678, -1.5761943,\n -2.3097022, -2.0295165, -2.9528203, -2.2042174, -1.9090188,\n -1.8868417, -2.4206855, -2.143362 , -1.880422 , -2.5127397,\n -2.1454868, -2.0043788, -2.570388 , -2.5082102, -2.3339696,\n -1.8621875, -2.4201612, -2.561397 , -2.0498512, -1.6772006,\n -1.6392376, -2.3855271, -1.8138398, -3.3776197, -2.3745804,\n -2.6683671, -1.8609927, -1.4205931, -1.8123009, -2.236284 ,\n -2.2161927, -2.5204146, -2.0504622, -2.1548996, -1.6896895,\n -1.3192847, -2.2909331, -2.1295016, -2.0703764, -1.9394028,\n -2.041992 , -1.8279521, -1.690125 , -2.7230937, -2.3157165,\n -1.7527001, -2.2544892, -2.6310122, -2.0703619, -2.2476096,\n -1.8941168, -1.5398859, -1.5742403, -2.375471 , -1.9361446], dtype=float32)\n\n\n\ndef plot_2d_reconstruction(X, params, model, trained = False):\n X_hat = model.apply(params, X)\n plt.scatter(X[:, 0], X[:, 1], label=\"Original Data\")\n plt.scatter(X_hat[:, 0], X_hat[:, 1], label=\"Reconstructed Data\")\n if trained:\n plt.title(\"Trained\")\n else:\n plt.title(\"Untrained\")\n\n\nplot_2d_reconstruction(X, params, ae, False)\n\n\n\n\n\n\n\n\n\n\nDefine the Loss function\n\\(\\ell_2\\) penalty\n\ndiff = X - X_hat\n\n\ndiff.shape\n\n(100, 2)\n\n\n\ndiff[:5]\n\nDeviceArray([[-0.46981597, 5.271835 ],\n [ 1.6502905 , 3.6781619 ],\n [ 1.8507848 , 5.0589485 ],\n [ 2.8690844 , 4.5646677 ],\n [ 0.4905889 , 2.8893166 ]], dtype=float32)\n\n\n\n(diff**2).sum(axis=1).mean() / 2\n\nDeviceArray(7.9555416, dtype=float32)\n\n\n\n(diff**2).sum(axis=1)[:5]\n\nDeviceArray([28.01297 , 16.252333, 29.018364, 29.067837, 8.588828], dtype=float32)\n\n\n\n(jnp.linalg.norm(diff, ord=2, axis=1) ** 2).mean() / 2\n\nDeviceArray(7.955541, dtype=float32)\n\n\n\nfrom sklearn.metrics import mean_squared_error\n\n\nmean_squared_error(X, X_hat)\n\n7.9555407\n\n\n\nprint(2 * optax.l2_loss(X_hat, X).mean())\n\n\"\"\"\n\nMultplying by two\nDocstring says:\nCalculates the L2 loss for a set of predictions.\n\nNote: the 0.5 term is standard in \"Pattern Recognition and Machine Learning\"\nby Bishop, but not \"The Elements of Statistical Learning\" by Tibshirani.\n\"\"\"\n\n7.9555416\n\n\n'\\n\\nMultplying by two\\nDocstring says:\\nCalculates the L2 loss for a set of predictions.\\n\\nNote: the 0.5 term is standard in \"Pattern Recognition and Machine Learning\"\\nby Bishop, but not \"The Elements of Statistical Learning\" by Tibshirani.\\n'\n\n\n\n@jax.jit\ndef loss(params, X):\n X_hat = ae.apply(params, X)\n return 2 * optax.l2_loss(X_hat, X).mean()\n\n\nloss(params, X)\n\nDeviceArray(7.9555416, dtype=float32)\n\n\n\n\nDefining the train function\n\ndef train(\n X: jnp.array,\n optimizer: optax._src.base.GradientTransformation,\n model: nn.Module,\n key_param: jax.random.PRNGKey,\n n_iter: int=500,\n print_every: int=10\n):\n loss_array = np.zeros(n_iter)\n def loss(params, X):\n X_hat = model.apply(params, X)\n return 2 * optax.l2_loss(X_hat, X).mean()\n\n params = model.init(key_param, X)\n opt_state = optimizer.init(params)\n loss_grad_fn = jax.value_and_grad(loss)\n\n for i in range(n_iter):\n loss_val, grads = loss_grad_fn(params, X)\n loss_array[i] = loss_val.item()\n updates, opt_state = optimizer.update(grads, opt_state)\n params = optax.apply_updates(params, updates)\n if i % print_every == 0:\n print(\"Loss step {}: \".format(i), loss_val)\n return params, loss_array\n\n\noptimized_params, loss_array = train(\n X, optax.adam(learning_rate=0.1), ae, jax.random.PRNGKey(0), n_iter=30\n)\n\nLoss step 0: 7.9555416\nLoss step 10: 1.3104575\nLoss step 20: 0.544944\n\n\n\nplt.plot(loss_array)\nplt.xlabel(\"Iterations\")\n_ = plt.ylabel(\"Reconstruction loss\")\n\n\n\n\n\n\n\n\n\nplot_2d_reconstruction(X, optimized_params, ae, True)\n\n\n\n\n\n\n\n\n\nfrom sklearn import datasets\n\n\ndigits = datasets.load_digits()\n\n\nX = jnp.array(digits[\"data\"])\ny = digits[\"target\"]\n\n\nX.shape\n\n(1797, 64)\n\n\n\nplt.imshow(X[1].reshape(8, 8), cmap=\"Greys\")\ny[1]\n\n1\n\n\n\n\n\n\n\n\n\n\nbn = 2\nae_digits = AE(bn, X.shape[1])\n\nae_digits\n\nAE(\n # attributes\n bottleneck = 2\n out = 64\n)\n\n\n\nprint(ae_digits.tabulate(random.PRNGKey(0), X))\n\n\n AE Summary \n┏━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓\n┃ path ┃ module ┃ inputs ┃ outputs ┃ params ┃\n┡━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩\n│ │ AE │ float32[1797,… │ float32[1797,6… │ │\n├────────────────┼─────────┼────────────────┼─────────────────┼────────────────┤\n│ encoder │ Encoder │ float32[1797,… │ float32[1797,2] │ │\n├────────────────┼─────────┼────────────────┼─────────────────┼────────────────┤\n│ encoder/Dense… │ Dense │ float32[1797,… │ float32[1797,5] │ bias: │\n│ │ │ │ │ float32[5] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[64,5] │\n│ │ │ │ │ │\n│ │ │ │ │ 325 (1.3 KB) │\n├────────────────┼─────────┼────────────────┼─────────────────┼────────────────┤\n│ encoder/Dense… │ Dense │ float32[1797,… │ float32[1797,2] │ bias: │\n│ │ │ │ │ float32[2] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[5,2] │\n│ │ │ │ │ │\n│ │ │ │ │ 12 (48 B) │\n├────────────────┼─────────┼────────────────┼─────────────────┼────────────────┤\n│ decoder │ Decoder │ float32[1797,… │ float32[1797,6… │ │\n├────────────────┼─────────┼────────────────┼─────────────────┼────────────────┤\n│ decoder/Dense… │ Dense │ float32[1797,… │ float32[1797,5] │ bias: │\n│ │ │ │ │ float32[5] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[2,5] │\n│ │ │ │ │ │\n│ │ │ │ │ 15 (60 B) │\n├────────────────┼─────────┼────────────────┼─────────────────┼────────────────┤\n│ decoder/Dense… │ Dense │ float32[1797,… │ float32[1797,6… │ bias: │\n│ │ │ │ │ float32[64] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[5,64] │\n│ │ │ │ │ │\n│ │ │ │ │ 384 (1.5 KB) │\n├────────────────┼─────────┼────────────────┼─────────────────┼────────────────┤\n│ │ │ │ Total │ 736 (2.9 KB) │\n└────────────────┴─────────┴────────────────┴─────────────────┴────────────────┘\n \n Total Parameters: 736 (2.9 KB) \n\n\n\n\n\nparams_digits = ae_digits.init(random.PRNGKey(0), X)\n\n\njax.tree_util.tree_map(lambda x: x.shape, params_digits)\n\nFrozenDict({\n params: {\n decoder: {\n Dense_0: {\n bias: (5,),\n kernel: (2, 5),\n },\n Dense_1: {\n bias: (64,),\n kernel: (5, 64),\n },\n },\n encoder: {\n Dense_0: {\n bias: (5,),\n kernel: (64, 5),\n },\n Dense_1: {\n bias: (2,),\n kernel: (5, 2),\n },\n },\n },\n})\n\n\n\ndef plot_encoding_2dim(encoder, params):\n assert encoder.bottleneck >= 2\n X_low = encoder.apply({\"params\": params[\"params\"][\"encoder\"]}, X)\n df = pd.DataFrame(X_low)\n df[\"label\"] = y\n sns.pairplot(df, hue=\"label\", palette=\"bright\")\n\n\n\nUntrained encodings\n\nplot_encoding_2dim(Encoder(bottleneck=bn), params_digits)\n\n\n\n\n\n\n\n\n\nX_recon = ae_digits.apply(params_digits, X)\n\n\ndef plot_orig_recon(index=0):\n fig, ax = plt.subplots(sharex=True, ncols=2)\n ax[0].imshow(X[index].reshape(8, 8), cmap=\"Greys\")\n ax[1].imshow(X_recon[index].reshape(8, 8), cmap=\"Greys\")\n ax[0].set_title(\"Original\")\n ax[1].set_title(\"Reconstructed\")\n\n\nplot_orig_recon(5)\n\n\n\n\n\n\n\n\n\noptimized_params_digits, loss_array_digits = train(\n X, optax.adam(learning_rate=0.01), ae_digits, jax.random.PRNGKey(0), n_iter=1000\n)\n\nLoss step 0: 90.91908\nLoss step 10: 62.609577\nLoss step 20: 58.390884\nLoss step 30: 53.54514\nLoss step 40: 45.062607\nLoss step 50: 33.541103\nLoss step 60: 25.167671\nLoss step 70: 21.107908\nLoss step 80: 19.424128\nLoss step 90: 18.734087\nLoss step 100: 18.47802\nLoss step 110: 18.390646\nLoss step 120: 18.352455\nLoss step 130: 18.333141\nLoss step 140: 18.321236\nLoss step 150: 18.311743\nLoss step 160: 18.3032\nLoss step 170: 18.295115\nLoss step 180: 18.287226\nLoss step 190: 18.279234\nLoss step 200: 18.270723\nLoss step 210: 18.26098\nLoss step 220: 18.2499\nLoss step 230: 18.237106\nLoss step 240: 18.221647\nLoss step 250: 18.20243\nLoss step 260: 18.177717\nLoss step 270: 18.14539\nLoss step 280: 18.105865\nLoss step 290: 18.058249\nLoss step 300: 18.000141\nLoss step 310: 17.931208\nLoss step 320: 17.84967\nLoss step 330: 17.755304\nLoss step 340: 17.65073\nLoss step 350: 17.537819\nLoss step 360: 17.418528\nLoss step 370: 17.293976\nLoss step 380: 17.164043\nLoss step 390: 17.029558\nLoss step 400: 16.89464\nLoss step 410: 16.760334\nLoss step 420: 16.626553\nLoss step 430: 16.493797\nLoss step 440: 16.362513\nLoss step 450: 16.234201\nLoss step 460: 16.11052\nLoss step 470: 15.992949\nLoss step 480: 15.883502\nLoss step 490: 15.783846\nLoss step 500: 15.694724\nLoss step 510: 15.615571\nLoss step 520: 15.54589\nLoss step 530: 15.483993\nLoss step 540: 15.427973\nLoss step 550: 15.376085\nLoss step 560: 15.326871\nLoss step 570: 15.280196\nLoss step 580: 15.23521\nLoss step 590: 15.191253\nLoss step 600: 15.149132\nLoss step 610: 15.109302\nLoss step 620: 15.071858\nLoss step 630: 15.037474\nLoss step 640: 15.005837\nLoss step 650: 14.977009\nLoss step 660: 14.950782\nLoss step 670: 14.927103\nLoss step 680: 14.905551\nLoss step 690: 14.885867\nLoss step 700: 14.867877\nLoss step 710: 14.851396\nLoss step 720: 14.836317\nLoss step 730: 14.8224125\nLoss step 740: 14.809575\nLoss step 750: 14.797547\nLoss step 760: 14.786259\nLoss step 770: 14.775562\nLoss step 780: 14.76545\nLoss step 790: 14.755904\nLoss step 800: 14.746771\nLoss step 810: 14.738021\nLoss step 820: 14.729595\nLoss step 830: 14.721415\nLoss step 840: 14.713423\nLoss step 850: 14.705618\nLoss step 860: 14.697898\nLoss step 870: 14.690201\nLoss step 880: 14.682494\nLoss step 890: 14.674812\nLoss step 900: 14.667133\nLoss step 910: 14.6593275\nLoss step 920: 14.651322\nLoss step 930: 14.643042\nLoss step 940: 14.634569\nLoss step 950: 14.625735\nLoss step 960: 14.616413\nLoss step 970: 14.6066065\nLoss step 980: 14.596094\nLoss step 990: 14.58464\n\n\n\nplt.plot(loss_array_digits)\n\n\n\n\n\n\n\n\n\n\nTrained encodings\n\nplot_encoding_2dim(Encoder(bottleneck=bn), optimized_params_digits)\n\n\n\n\n\n\n\n\n\n\nReconstruction\n\nX_recon = ae_digits.apply(optimized_params_digits, X)\nplot_orig_recon(4)\n\n\n\n\n\n\n\n\n\nX_reconstructed = ae.apply(params, X)\n\n\nerrs = jnp.square(X - X_reconstructed).sum(axis=1)\nerr_df = pd.DataFrame({\"error\": errs, \"label\": y})\nerr_df.groupby(\"label\").mean()\n\n\n\n\n\n\n\n\n\nerror\n\n\nlabel\n\n\n\n\n\n0\n1067.159668\n\n\n1\n1253.397217\n\n\n2\n1187.446655\n\n\n3\n730.839417\n\n\n4\n919.732239\n\n\n5\n1103.442505\n\n\n6\n913.172607\n\n\n7\n1309.424438\n\n\n8\n892.981750\n\n\n9\n891.891907\n\n\n\n\n\n\n\n\n\nerr_df = pd.DataFrame({\"error\": errs, \"label\": y})\n\n\nerr_df.groupby(\"label\").mean()\n\n\n\n\n\n\n\n\n\nerror\n\n\nlabel\n\n\n\n\n\n0\n1067.159668\n\n\n1\n1253.397217\n\n\n2\n1187.446655\n\n\n3\n730.839417\n\n\n4\n919.732239\n\n\n5\n1103.442505\n\n\n6\n913.172607\n\n\n7\n1309.424438\n\n\n8\n892.981750\n\n\n9\n891.891907\n\n\n\n\n\n\n\n\n\n\nConvoluational AE\n\nclass ConvEncoder(nn.Module):\n bottleneck: int\n\n @nn.compact\n def __call__(self, x):\n n = x.shape[0] # x is nx64\n x = x.reshape(n, 8, 8, 1)\n x = nn.Conv(features=4, kernel_size=(2, 2), strides=1, padding=0)(\n x\n ) # 8X8X1 -> 6x6X4\n x = nn.selu(x)\n x = nn.max_pool(x, window_shape=(2, 2), strides=(2, 2)) # 6x6x4 --> 3x3x4\n x = nn.selu(x)\n x = x.reshape(n, -1) # N X 3x3x4 -> N X 36\n x = nn.Dense(self.bottleneck)(x)\n return x\n\n\nce = ConvEncoder(2)\n#print(ce.tabulate(random.PRNGKey(0), X))\nprint(ce.tabulate(random.PRNGKey(0), X, console_kwargs={\"width\": 120}))\n\n\n ConvEncoder Summary \n┏━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ path ┃ module ┃ inputs ┃ outputs ┃ params ┃\n┡━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ │ ConvEncoder │ float32[1797,64] │ float32[1797,2] │ │\n├─────────┼─────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ Conv_0 │ Conv │ float32[1797,8,8,1] │ float32[1797,7,7,4] │ bias: float32[4] │\n│ │ │ │ │ kernel: float32[2,2,1,4] │\n│ │ │ │ │ │\n│ │ │ │ │ 20 (80 B) │\n├─────────┼─────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ Dense_0 │ Dense │ float32[1797,36] │ float32[1797,2] │ bias: float32[2] │\n│ │ │ │ │ kernel: float32[36,2] │\n│ │ │ │ │ │\n│ │ │ │ │ 74 (296 B) │\n├─────────┼─────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ │ │ │ Total │ 94 (376 B) │\n└─────────┴─────────────┴─────────────────────┴─────────────────────┴──────────────────────────┘\n \n Total Parameters: 94 (376 B) \n\n\n\n\n\nclass ConvDecoder(nn.Module):\n @nn.compact\n def __call__(self, x):\n x = nn.Dense(36)(x) # Nx2 --> Nx36\n x = nn.selu(x)\n x = x.reshape(-1, 3, 3, 4) # NX3X3X4\n x = nn.ConvTranspose(features=4, kernel_size=(2, 2), strides=(2, 2))(\n x\n ) # 3x3x4 -> 6x6X4\n x = nn.selu(x)\n x = nn.Conv(features=1, kernel_size=(1, 1), strides=1, padding=1)(\n x\n ) # 6x6x4 -> 8x8x1\n x = x.reshape(-1, 64)\n return x\n\n\ncd = ConvDecoder()\nprint(\n cd.tabulate(\n random.PRNGKey(0),\n jax.random.normal(key=jax.random.PRNGKey(0), shape=(1797, 2)),\n console_kwargs={\"width\": 120},\n )\n)\n\n\n ConvDecoder Summary \n┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ path ┃ module ┃ inputs ┃ outputs ┃ params ┃\n┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ │ ConvDecoder │ float32[1797,2] │ float32[1797,64] │ │\n├─────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ Dense_0 │ Dense │ float32[1797,2] │ float32[1797,36] │ bias: float32[36] │\n│ │ │ │ │ kernel: float32[2,36] │\n│ │ │ │ │ │\n│ │ │ │ │ 108 (432 B) │\n├─────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ ConvTranspose_0 │ ConvTranspose │ float32[1797,3,3,4] │ float32[1797,6,6,4] │ bias: float32[4] │\n│ │ │ │ │ kernel: float32[2,2,4,4] │\n│ │ │ │ │ │\n│ │ │ │ │ 68 (272 B) │\n├─────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ Conv_0 │ Conv │ float32[1797,6,6,4] │ float32[1797,8,8,1] │ bias: float32[1] │\n│ │ │ │ │ kernel: float32[1,1,4,1] │\n│ │ │ │ │ │\n│ │ │ │ │ 5 (20 B) │\n├─────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ │ │ │ Total │ 181 (724 B) │\n└─────────────────┴───────────────┴─────────────────────┴─────────────────────┴──────────────────────────┘\n \n Total Parameters: 181 (724 B) \n\n\n\n\n\nclass ConvAE(nn.Module):\n bottleneck: int\n\n def setup(self):\n # Alternative to @nn.compact -> explicitly define modules\n # Better for later when we want to access the encoder and decoder explicitly\n self.encoder = ConvEncoder(bottleneck=self.bottleneck)\n self.decoder = ConvDecoder()\n\n def __call__(self, x):\n\n z = self.encoder(x)\n x_hat = self.decoder(z)\n return x_hat\n\n\ncae = ConvAE(2)\nprint(\n cae.tabulate(\n random.PRNGKey(0),\n X,\n console_kwargs={\"width\": 120},\n )\n)\n\n\n ConvAE Summary \n┏━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ path ┃ module ┃ inputs ┃ outputs ┃ params ┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ │ ConvAE │ float32[1797,64] │ float32[1797,64] │ │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ encoder │ ConvEncoder │ float32[1797,64] │ float32[1797,2] │ │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ encoder/Conv_0 │ Conv │ float32[1797,8,8,1] │ float32[1797,7,7,4] │ bias: float32[4] │\n│ │ │ │ │ kernel: float32[2,2,1,4] │\n│ │ │ │ │ │\n│ │ │ │ │ 20 (80 B) │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ encoder/Dense_0 │ Dense │ float32[1797,36] │ float32[1797,2] │ bias: float32[2] │\n│ │ │ │ │ kernel: float32[36,2] │\n│ │ │ │ │ │\n│ │ │ │ │ 74 (296 B) │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ decoder │ ConvDecoder │ float32[1797,2] │ float32[1797,64] │ │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ decoder/Dense_0 │ Dense │ float32[1797,2] │ float32[1797,36] │ bias: float32[36] │\n│ │ │ │ │ kernel: float32[2,36] │\n│ │ │ │ │ │\n│ │ │ │ │ 108 (432 B) │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ decoder/ConvTranspose_0 │ ConvTranspose │ float32[1797,3,3,4] │ float32[1797,6,6,4] │ bias: float32[4] │\n│ │ │ │ │ kernel: float32[2,2,4,4] │\n│ │ │ │ │ │\n│ │ │ │ │ 68 (272 B) │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ decoder/Conv_0 │ Conv │ float32[1797,6,6,4] │ float32[1797,8,8,1] │ bias: float32[1] │\n│ │ │ │ │ kernel: float32[1,1,4,1] │\n│ │ │ │ │ │\n│ │ │ │ │ 5 (20 B) │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ │ │ │ Total │ 275 (1.1 KB) │\n└─────────────────────────┴───────────────┴─────────────────────┴─────────────────────┴──────────────────────────┘\n \n Total Parameters: 275 (1.1 KB) \n\n\n\n\n\nparams = cae.init(random.PRNGKey(0), X)\n\n\nplot_encoding_2dim(ConvEncoder(bottleneck=2), params)\n\n\n\n\n\n\n\n\n\noptimized_params_digits_cae, loss_array_digits_cae = train(\n X, optax.adam(learning_rate=0.01), cae, jax.random.PRNGKey(0), n_iter=1000, print_every=50\n)\n\nLoss step 0: 61.916904\nLoss step 50: 30.379993\nLoss step 100: 27.855324\nLoss step 150: 26.851124\nLoss step 200: 25.77603\nLoss step 250: 25.184359\nLoss step 300: 24.772747\nLoss step 350: 24.351847\nLoss step 400: 24.091908\nLoss step 450: 23.887573\nLoss step 500: 23.72832\nLoss step 550: 23.607725\nLoss step 600: 23.514961\nLoss step 650: 23.419945\nLoss step 700: 23.363184\nLoss step 750: 23.30127\nLoss step 800: 23.258532\nLoss step 850: 23.206999\nLoss step 900: 23.162285\nLoss step 950: 23.13027\n\n\n\nplot_encoding_2dim(ConvEncoder(bottleneck=2), optimized_params_digits_cae)\n\n\n\n\n\n\n\n\n\n\nBayesOpt for optimizing the latent dimension\n\ndef black_box_function(x, y):\n \"\"\"Function with unknown internals we wish to maximize.\n\n This is just serving as an example, for all intents and\n purposes think of the internals of this function, i.e.: the process\n which generates its output values, as unknown.\n \"\"\"\n x = int(x)\n y = int(y)\n return function_discrete(x, y)\n\n\ndef function_discrete(x, y):\n assert type(x) ==int\n return -(x**2) - (y - 1) ** 2 + 1\n\n\npbounds = {\"x\": (2, 4), \"y\": (-3, 3)}\n\n\noptimizer = BayesianOptimization(\n f=black_box_function,\n pbounds=pbounds,\n verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent\n random_state=1,\n)\n\n\noptimizer.maximize()\n\n| iter | target | x | y |\n-------------------------------------------------\n| 1 | -3.0 | 2.834 | 1.322 |\n| 2 | -7.0 | 2.0 | -1.186 |\n| 3 | -12.0 | 2.294 | -2.446 |\n| 4 | -4.0 | 2.373 | -0.9266 |\n| 5 | -4.0 | 2.794 | 0.2329 |\n| 6 | -15.0 | 4.0 | 1.331 |\n| 7 | -4.0 | 2.348 | 0.8879 |\n| 8 | -3.0 | 2.797 | 1.257 |\n| 9 | -4.0 | 2.064 | 2.229 |\n| 10 | -9.0 | 3.657 | -0.9428 |\n| 11 | -7.0 | 2.901 | 3.0 |\n| 12 | -4.0 | 2.0 | -0.1486 |\n| 13 | -31.0 | 4.0 | -3.0 |\n| 14 | -7.0 | 2.0 | 3.0 |\n| 15 | -3.0 | 2.0 | 1.539 |\n| 16 | -3.0 | 2.512 | 1.792 |\n| 17 | -19.0 | 4.0 | 3.0 |\n| 18 | -4.0 | 2.831 | -0.4655 |\n| 19 | -4.0 | 2.402 | -0.3286 |\n| 20 | -9.0 | 3.539 | 0.08748 |\n| 21 | -7.0 | 2.841 | -1.217 |\n| 22 | -4.0 | 2.764 | 2.245 |\n| 23 | -4.0 | 2.0 | 0.4436 |\n| 24 | -3.0 | 2.469 | 1.423 |\n| 25 | -3.0 | 2.0 | 1.16 |\n| 26 | -3.0 | 2.787 | 1.714 |\n| 27 | -4.0 | 2.932 | 0.7853 |\n| 28 | -3.0 | 2.647 | 1.526 |\n| 29 | -3.0 | 2.148 | 1.373 |\n| 30 | -3.0 | 2.212 | 1.795 |\n=================================================\n\n\n\noptimizer.max\n\n{'target': -3.0, 'params': {'x': 2.8340440094051482, 'y': 1.3219469606529488}}\n\n\n\n{k: int(v) for k, v in optimizer.max[\"params\"].items()}\n\n{'x': 2, 'y': 1}\n\n\n\nfunction_discrete(2, 1)\n\n-3\n\n\nLet us keep a separate validation set\n\ndef loss_model(params, X, model):\n X_hat = model.apply(params, X)\n diff = X - X_hat\n return (diff**2).sum(axis=1).mean() / X.shape[1]\n\n\nfrom functools import partial\n\ne = partial(loss_model, model=cae)\ne(params, X)\n\nDeviceArray(61.916904, dtype=float32)\n\n\n\ndef validation_loss_discrete(bn):\n assert type(bn) == int\n\n # Train the model on bn sized bottleneck\n cae = ConvAE(bn)\n loss_fn_concrete = jax.jit(partial(loss_model, model=cae))\n loss_grad_fn = jax.value_and_grad(loss_fn_concrete)\n tx = optax.adam(learning_rate=1e-2)\n params = cae.init(random.PRNGKey(0), X_train)\n opt_state = tx.init(params)\n print(f\"--------Bottleneck of Size: {bn}-------------\")\n for i in range(30):\n loss_val, grads = loss_grad_fn(params, X_train)\n updates, opt_state = tx.update(grads, opt_state)\n params = optax.apply_updates(params, updates)\n\n if i % 5 == 0:\n print(\"Loss step {}: \".format(i), loss_val)\n print(f\"--------End-------------\")\n\n # Evaluate on validation dataset\n return loss_fn_concrete(params, X_validation)\n\n\nX_train, X_validation = X[:1000], X[1000:]\n\n\nvalidation_loss_discrete(2)\n\n--------Bottleneck of Size: 2-------------\nLoss step 0: 62.27715\nLoss step 5: 58.5037\nLoss step 10: 53.984245\nLoss step 15: 49.513382\nLoss step 20: 43.078316\nLoss step 25: 38.30596\n--------End-------------\n\n\nDeviceArray(36.75615, dtype=float32)\n\n\n\ndef validation_loss_bb(bn):\n bn_int = int(bn)\n return -validation_loss_discrete(bn_int)\n\n\nvalidation_loss_bb(2.5)\n\n--------Bottleneck of Size: 2-------------\nLoss step 0: 62.27715\nLoss step 5: 58.5037\nLoss step 10: 53.984245\nLoss step 15: 49.513382\nLoss step 20: 43.078316\nLoss step 25: 38.30596\n--------End-------------\n\n\nDeviceArray(-36.75615, dtype=float32)\n\n\n\npbounds = {\"bn\": (1, 40)}\noptimizer = BayesianOptimization(\n f=validation_loss_bb,\n pbounds=pbounds,\n verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent\n random_state=1,\n)\n\n\noptimizer.maximize(n_iter=8)\n\n| iter | target | bn |\n-------------------------------------\n--------Bottleneck of Size: 17-------------\nLoss step 0: 62.85297\nLoss step 5: 52.85449\nLoss step 10: 40.903214\nLoss step 15: 35.32036\nLoss step 20: 35.3193\nLoss step 25: 33.33418\n--------End-------------\n| 1 | -32.36 | 17.26 |\n--------Bottleneck of Size: 29-------------\nLoss step 0: 64.064514\nLoss step 5: 53.85875\nLoss step 10: 47.26749\nLoss step 15: 43.828564\nLoss step 20: 41.847286\nLoss step 25: 39.23966\n--------End-------------\n| 2 | -37.29 | 29.09 |\n--------Bottleneck of Size: 1-------------\nLoss step 0: 60.969757\nLoss step 5: 58.92785\nLoss step 10: 53.683678\nLoss step 15: 49.58035\nLoss step 20: 45.86102\nLoss step 25: 44.17104\n--------End-------------\n| 3 | -42.48 | 1.004 |\n--------Bottleneck of Size: 12-------------\nLoss step 0: 63.704227\nLoss step 5: 57.338806\nLoss step 10: 49.537926\nLoss step 15: 41.210827\nLoss step 20: 38.469257\nLoss step 25: 35.276833\n--------End-------------\n| 4 | -34.07 | 12.79 |\n--------Bottleneck of Size: 6-------------\nLoss step 0: 61.450924\nLoss step 5: 55.82548\nLoss step 10: 47.88899\nLoss step 15: 40.131763\nLoss step 20: 37.62544\nLoss step 25: 35.873016\n--------End-------------\n| 5 | -34.2 | 6.723 |\n--------Bottleneck of Size: 20-------------\nLoss step 0: 61.81845\nLoss step 5: 56.358246\nLoss step 10: 51.92751\nLoss step 15: 47.312576\nLoss step 20: 42.146885\nLoss step 25: 37.025486\n--------End-------------\n| 6 | -33.86 | 20.39 |\n--------Bottleneck of Size: 40-------------\nLoss step 0: 61.5667\nLoss step 5: 49.598972\nLoss step 10: 42.639145\nLoss step 15: 39.22532\nLoss step 20: 36.597954\nLoss step 25: 34.528015\n--------End-------------\n| 7 | -32.67 | 40.0 |\n--------Bottleneck of Size: 36-------------\nLoss step 0: 62.303535\nLoss step 5: 52.075367\nLoss step 10: 44.435425\nLoss step 15: 40.889286\nLoss step 20: 39.280178\nLoss step 25: 37.09512\n--------End-------------\n| 8 | -35.77 | 36.05 |\n--------Bottleneck of Size: 9-------------\nLoss step 0: 63.35566\nLoss step 5: 52.45499\nLoss step 10: 43.281902\nLoss step 15: 37.028984\nLoss step 20: 35.006325\nLoss step 25: 33.583298\n--------End-------------\n| 9 | -33.01 | 9.596 |\n--------Bottleneck of Size: 24-------------\nLoss step 0: 62.888515\nLoss step 5: 52.035835\nLoss step 10: 42.154068\nLoss step 15: 36.804348\nLoss step 20: 34.53549\nLoss step 25: 32.37921\n--------End-------------\n| 10 | -30.08 | 24.26 |\n--------Bottleneck of Size: 25-------------\nLoss step 0: 63.406757\nLoss step 5: 50.291225\nLoss step 10: 41.73214\nLoss step 15: 38.421593\nLoss step 20: 37.0491\nLoss step 25: 34.847046\n--------End-------------\n| 11 | -33.89 | 25.81 |\n--------Bottleneck of Size: 22-------------\nLoss step 0: 62.303898\nLoss step 5: 53.713398\nLoss step 10: 47.806355\nLoss step 15: 43.550034\nLoss step 20: 42.033653\nLoss step 25: 39.68766\n--------End-------------\n| 12 | -38.51 | 22.8 |\n--------Bottleneck of Size: 24-------------\nLoss step 0: 62.888515\nLoss step 5: 52.035835\nLoss step 10: 42.154068\nLoss step 15: 36.804348\nLoss step 20: 34.53549\nLoss step 25: 32.37921\n--------End-------------\n| 13 | -30.08 | 24.3 |\n=====================================\n\n\n\noptimizer.max\n\n{'target': -30.082199096679688, 'params': {'bn': 24.25939633195359}}\n\n\n\n\nVAE\n\nclass VAE_Encoder(nn.Module):\n bottleneck: int\n\n @nn.compact\n def __call__(self, x):\n x = nn.Dense(5)(x)\n x = nn.selu(x)\n mu = nn.Dense(features=self.bottleneck)(x)\n log_std = nn.Dense(features=self.bottleneck)(x)\n return mu, log_std\n\n\ndef reparameterize(mu, log_std, key=random.PRNGKey(0), samples=1):\n std = jnp.exp(log_std)\n eps = random.normal(key=key, shape=(samples,))\n return mu + eps * std\n\n\nsamples = reparameterize(2, jnp.log(1), samples=5000)\nsns.kdeplot(samples)\nplt.title(f\"Mean:{jnp.mean(samples):0.2f}, stddev: {jnp.std(samples):0.2f}\")\n\nText(0.5, 1.0, 'Mean:2.00, stddev: 1.00')\n\n\n\n\n\n\n\n\n\n\nclass VAE(nn.Module):\n bottleneck: int\n out: int\n\n def setup(self):\n # Alternative to @nn.compact -> explicitly define modules\n # Better for later when we want to access the encoder and decoder explicitly\n self.encoder = VAE_Encoder(bottleneck=self.bottleneck)\n self.decoder = Decoder(out=self.out)\n\n def __call__(self, x, rng=random.PRNGKey(0)):\n mu, log_std = self.encoder(x)\n z = reparameterize(mu, log_std, key=rng)\n x_hat = self.decoder(z)\n return x_hat, mu, log_std\n\n\nvae = VAE(bottleneck=2, out=64)\n\n\nparams = vae.init(random.PRNGKey(10), X)\n\n\nplt.imshow(vae.apply(params, X)[0][0].reshape(8, 8))\n\n\n\n\n\n\n\n\n\nvae.apply(params, X, random.PRNGKey(10))[0][0].reshape(8, 8)\n\nDeviceArray([[ -3999.399 , 6091.6396 , -2634.2932 , 307.47302 ,\n 3932.0298 , 1823.3352 , 3852.157 , 5576.5605 ],\n [ -8809.304 , 5299.91 , 286.5227 , 1059.3925 ,\n -951.62537 , -6623.4824 , -1463.6239 , 16223.624 ],\n [ -5279.1323 , -7333.815 , -71.1485 , 5679.2773 ,\n 1384.2794 , 8326.92 , -1747.943 , -4802.341 ],\n [ 403.3739 , 13455.688 , -7414.195 , 7299.713 ,\n 1180.7408 , -328.49432 , 6619.1357 , 363.74713 ],\n [ -4376.3506 , -2045.3063 , 2618.412 , -10890.402 ,\n -3035.3848 , -3574.7527 , -5057.2593 , -1859.8529 ],\n [ -53.99241 , 2318.109 , -1323.9087 , -6801.4814 ,\n -7300.1553 , 865.4169 , 13349.937 , 865.3773 ],\n [ 37.275284, -3962.8357 , 1771.9886 , -7992.7188 ,\n 4896.562 , -17371.383 , 4737.3887 , 7307.3384 ],\n [ -221.0234 , -5475.8447 , 4189.172 , -1095.9471 ,\n -6452.915 , 3767.8381 , -10514.758 , -2311.0862 ]], dtype=float32)\n\n\n\nvae_e = VAE_Encoder(2)\nmu, log_sigma = vae_e.apply({\"params\": params[\"params\"][\"encoder\"]}, X)\n\n\ntfd = tfp.distributions\n\n\nq\n\nNameError: name 'q' is not defined\n\n\n\ntfd.kl_divergence(q, p).shape\n\n\ntfd.kl_divergence(q, p).mean()\n\n\nq.stddev()\n\n\n\nLoss\n\n@jax.jit\ndef loss_vae(params, X, rng=random.PRNGKey(0)):\n X_hat, mu, log_sigma = vae.apply(params, X, rng)\n q = tfd.Normal(loc=mu, scale=jnp.exp(log_sigma))\n p = tfd.Normal(loc=0.0, scale=1.0)\n kl_loss = tfd.kl_divergence(q, p).mean()\n\n diff = X - X_hat\n recon_loss = (diff**2).sum(axis=1).mean() / X.shape[1]\n\n return recon_loss + 0.0020 * kl_loss\n\n\nloss_vae(params, X, random.PRNGKey(4))\n\n\nimport optax\n\nlearning_rate = 0.01\ntx = optax.adam(learning_rate=learning_rate)\nopt_state = tx.init(params)\nloss_grad_fn = jax.value_and_grad(loss_vae)\n\n\nfor i in range(2001):\n rng, key = random.split(rng)\n loss_val, grads = loss_grad_fn(params, X, rng)\n updates, opt_state = tx.update(grads, opt_state)\n params = optax.apply_updates(params, updates)\n if i % 50 == 0:\n print(\"Loss step {}: \".format(i), loss_val)\n\n\nX_recon, _, _ = vae.apply(params, X)\n\n\nplot_orig_recon(8)\n\n\ndec = Decoder(out=64)\nN = 10\nx_range = jnp.linspace(-2, 2, N)\nfig, ax = plt.subplots(ncols=N, sharey=True, figsize=(20, 4))\nfor i in range(N):\n ax[i].imshow(\n dec.apply(\n {\"params\": params[\"params\"][\"decoder\"]}, jnp.array([x_range[i], 0.0])\n ).reshape(8, 8),\n cmap=\"Greys\",\n )\n\n\ndef plot_encoding_2dim_vae(encoder, params):\n assert encoder.bottleneck >= 2\n mu, log_sigma = encoder.apply({\"params\": params[\"params\"][\"encoder\"]}, X)\n df = pd.DataFrame(mu)\n df[\"label\"] = y\n sns.pairplot(df, hue=\"label\", palette=\"bright\")\n\n\nvae_enc = VAE_Encoder(2)\nmu, log_sigma = vae_enc.apply({\"params\": params[\"params\"][\"encoder\"]}, X)\n# plot_encoding_2dim_vae(VAE_Encoder(2), params)\n\n\nplot_encoding_2dim_vae(vae_enc, params)\n\n\n\nTODO\n\nregular AE: Bayesopt for latent dimension\ngeneration from regular AE\ngraph of reconstruction loss v/s latent dimension for regular AE\nGIF for walking in latent space for VAE\nReconstruction as a factor of Recon + Beta X KL\nGet the Encoder from AE object directly\nImpact of MC samples\nReconstruction v/s Expected Log Likelihood (confirm the trend is same for both)\nCleanup code so that can be reused rather than copy pasting\nSparse VAE\nAdd references\nAdd bib entry\nConsider CNNs for more realistic datasets\n\n\nhttps://lilianweng.github.io/posts/2018-08-12-vae/\nhttps://theaisummer.com/jax-tensorflow-pytorch/\nhttps://dmol.pub/dl/VAE.html" + "text": "Imports\n\nimport jax\nimport jax.numpy as jnp\nimport numpy as np\nimport optax\n\nimport matplotlib.pyplot as plt\nimport matplotlib.pyplot as plt\n%matplotlib inline\n%config InlineBackend.figure_format='retina'\n\nimport jax.random as random\nimport tensorflow_probability.substrates.jax as tfp\n\nfrom flax import linen as nn\nfrom typing import Any, Callable, Sequence\n\nimport seaborn as sns\nimport pandas as pd\n\nfrom bayes_opt import BayesianOptimization\n\n\n\n\nCreate a simple 2d dataset\n\nX = random.multivariate_normal(\n key=random.PRNGKey(0),\n shape=(100,),\n mean=jnp.array([1, 3]),\n cov=jnp.array([[1.0, -0.5], [-0.5, 2.0]]),\n)\n\n\nX.shape\n\n(100, 2)\n\n\n\nplt.scatter(X[:, 0], X[:, 1])\n# plt.gca().set_aspect(\"equal\")\n\n\n\n\n\n\n\n\n\nclass Encoder(nn.Module):\n bottleneck: int\n\n @nn.compact\n def __call__(self, x):\n x = nn.Dense(5)(x)\n x = nn.selu(x)\n x = nn.Dense(features=self.bottleneck)(x)\n return x\n\n\nclass Decoder(nn.Module):\n out: int\n\n @nn.compact\n def __call__(self, x):\n x = nn.Dense(5)(x)\n x = nn.selu(x)\n x = nn.Dense(features=self.out)(x)\n return x\n\n\nenc = Encoder(bottleneck=1)\n\ndec = Decoder(out=2)\n\n\nparams_enc = enc.init(random.PRNGKey(0), X)\nX_bottlenecked = enc.apply(params_enc, X)\nX_bottlenecked.shape\n\n(100, 1)\n\n\n\nprint(enc.tabulate(random.PRNGKey(0), X))\n\nprint(dec.tabulate(random.PRNGKey(0), X_bottlenecked))\n\n\n Encoder Summary \n┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┓\n┃ path ┃ module ┃ inputs ┃ outputs ┃ params ┃\n┡━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━┩\n│ │ Encoder │ float32[100,2] │ float32[100,1] │ │\n├─────────┼─────────┼────────────────┼────────────────┼──────────────────────┤\n│ Dense_0 │ Dense │ float32[100,2] │ float32[100,5] │ bias: float32[5] │\n│ │ │ │ │ kernel: float32[2,5] │\n│ │ │ │ │ │\n│ │ │ │ │ 15 (60 B) │\n├─────────┼─────────┼────────────────┼────────────────┼──────────────────────┤\n│ Dense_1 │ Dense │ float32[100,5] │ float32[100,1] │ bias: float32[1] │\n│ │ │ │ │ kernel: float32[5,1] │\n│ │ │ │ │ │\n│ │ │ │ │ 6 (24 B) │\n├─────────┼─────────┼────────────────┼────────────────┼──────────────────────┤\n│ │ │ │ Total │ 21 (84 B) │\n└─────────┴─────────┴────────────────┴────────────────┴──────────────────────┘\n \n Total Parameters: 21 (84 B) \n\n\n\n Decoder Summary \n┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┓\n┃ path ┃ module ┃ inputs ┃ outputs ┃ params ┃\n┡━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━┩\n│ │ Decoder │ float32[100,1] │ float32[100,2] │ │\n├─────────┼─────────┼────────────────┼────────────────┼──────────────────────┤\n│ Dense_0 │ Dense │ float32[100,1] │ float32[100,5] │ bias: float32[5] │\n│ │ │ │ │ kernel: float32[1,5] │\n│ │ │ │ │ │\n│ │ │ │ │ 10 (40 B) │\n├─────────┼─────────┼────────────────┼────────────────┼──────────────────────┤\n│ Dense_1 │ Dense │ float32[100,5] │ float32[100,2] │ bias: float32[2] │\n│ │ │ │ │ kernel: float32[5,2] │\n│ │ │ │ │ │\n│ │ │ │ │ 12 (48 B) │\n├─────────┼─────────┼────────────────┼────────────────┼──────────────────────┤\n│ │ │ │ Total │ 22 (88 B) │\n└─────────┴─────────┴────────────────┴────────────────┴──────────────────────┘\n \n Total Parameters: 22 (88 B) \n\n\n\n\n\nclass AE(nn.Module):\n bottleneck: int\n out: int\n def setup(self):\n # Alternative to @nn.compact -> explicitly define modules\n # Better for later when we want to access the encoder and decoder explicitly\n self.encoder = Encoder(bottleneck=self.bottleneck)\n self.decoder = Decoder(out=self.out)\n\n def __call__(self, x):\n\n z = self.encoder(x)\n x_hat = self.decoder(z)\n return x_hat\n\n\nbottleneck_size = 1\nout_size = X.shape[1]\nae = AE(bottleneck_size, out_size)\n\n\nae\n\nAE(\n # attributes\n bottleneck = 1\n out = 2\n)\n\n\n\nprint(ae.tabulate(random.PRNGKey(0), X))\n\n\n AE Summary \n┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓\n┃ path ┃ module ┃ inputs ┃ outputs ┃ params ┃\n┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩\n│ │ AE │ float32[100,2] │ float32[100,2] │ │\n├─────────────────┼─────────┼────────────────┼────────────────┼────────────────┤\n│ encoder │ Encoder │ float32[100,2] │ float32[100,1] │ │\n├─────────────────┼─────────┼────────────────┼────────────────┼────────────────┤\n│ encoder/Dense_0 │ Dense │ float32[100,2] │ float32[100,5] │ bias: │\n│ │ │ │ │ float32[5] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[2,5] │\n│ │ │ │ │ │\n│ │ │ │ │ 15 (60 B) │\n├─────────────────┼─────────┼────────────────┼────────────────┼────────────────┤\n│ encoder/Dense_1 │ Dense │ float32[100,5] │ float32[100,1] │ bias: │\n│ │ │ │ │ float32[1] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[5,1] │\n│ │ │ │ │ │\n│ │ │ │ │ 6 (24 B) │\n├─────────────────┼─────────┼────────────────┼────────────────┼────────────────┤\n│ decoder │ Decoder │ float32[100,1] │ float32[100,2] │ │\n├─────────────────┼─────────┼────────────────┼────────────────┼────────────────┤\n│ decoder/Dense_0 │ Dense │ float32[100,1] │ float32[100,5] │ bias: │\n│ │ │ │ │ float32[5] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[1,5] │\n│ │ │ │ │ │\n│ │ │ │ │ 10 (40 B) │\n├─────────────────┼─────────┼────────────────┼────────────────┼────────────────┤\n│ decoder/Dense_1 │ Dense │ float32[100,5] │ float32[100,2] │ bias: │\n│ │ │ │ │ float32[2] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[5,2] │\n│ │ │ │ │ │\n│ │ │ │ │ 12 (48 B) │\n├─────────────────┼─────────┼────────────────┼────────────────┼────────────────┤\n│ │ │ │ Total │ 43 (172 B) │\n└─────────────────┴─────────┴────────────────┴────────────────┴────────────────┘\n \n Total Parameters: 43 (172 B) \n\n\n\n\n\nparams = ae.init(random.PRNGKey(0), X)\nparams\n\nFrozenDict({\n params: {\n encoder: {\n Dense_0: {\n kernel: DeviceArray([[ 0.17535934, -1.0953957 , 0.69273657, -0.26352578,\n 0.63077825],\n [ 0.36360174, -0.73782593, -0.5395247 , -0.41536337,\n -0.30090812]], dtype=float32),\n bias: DeviceArray([0., 0., 0., 0., 0.], dtype=float32),\n },\n Dense_1: {\n kernel: DeviceArray([[-0.64744544],\n [ 0.4855265 ],\n [-0.82133824],\n [ 0.62454295],\n [ 0.6013553 ]], dtype=float32),\n bias: DeviceArray([0.], dtype=float32),\n },\n },\n decoder: {\n Dense_0: {\n kernel: DeviceArray([[-0.5305567 , 1.1100855 , -0.31129056, 0.43152457,\n -0.09589562]], dtype=float32),\n bias: DeviceArray([0., 0., 0., 0., 0.], dtype=float32),\n },\n Dense_1: {\n kernel: DeviceArray([[-0.76956064, 0.13031492],\n [ 0.11736098, 0.47368795],\n [-0.12549445, -0.31066778],\n [-0.4392067 , -0.9067152 ],\n [-0.86761785, 0.42325035]], dtype=float32),\n bias: DeviceArray([0., 0.], dtype=float32),\n },\n },\n },\n})\n\n\n\nX_hat = ae.apply(params, X)\nX_hat.shape\n\n(100, 2)\n\n\n\ntry:\n ae.encoder\nexcept:\n pass\n # Trying to figure this out\n # https://github.com/google/flax/discussions/2602\n\n\n# Encoded values/latent representation\nencoded_1d = Encoder(1).apply({\"params\": params[\"params\"][\"encoder\"]}, X).flatten()\nencoded_1d\n\nDeviceArray([-2.4718695, -2.1964364, -2.6823573, -2.4936147, -1.7122931,\n -1.8346143, -2.0767107, -1.8570523, -1.7632042, -2.067935 ,\n -2.2317708, -2.14561 , -1.0023856, -2.1458383, -2.3645976,\n -1.9418356, -2.7020268, -1.6407721, -1.8281609, -2.2202983,\n -2.517499 , -2.5888596, -2.0095935, -2.4470625, -2.18571 ,\n -1.9742887, -1.8921608, -2.245328 , -0.8897901, -2.5329056,\n -2.2861118, -1.5862433, -2.2295656, -2.496296 , -2.404385 ,\n -2.0180435, -1.8416756, -1.858724 , -2.0980945, -1.777173 ,\n -2.0027544, -2.1870096, -2.44952 , -1.7563678, -1.5761943,\n -2.3097022, -2.0295165, -2.9528203, -2.2042174, -1.9090188,\n -1.8868417, -2.4206855, -2.143362 , -1.880422 , -2.5127397,\n -2.1454868, -2.0043788, -2.570388 , -2.5082102, -2.3339696,\n -1.8621875, -2.4201612, -2.561397 , -2.0498512, -1.6772006,\n -1.6392376, -2.3855271, -1.8138398, -3.3776197, -2.3745804,\n -2.6683671, -1.8609927, -1.4205931, -1.8123009, -2.236284 ,\n -2.2161927, -2.5204146, -2.0504622, -2.1548996, -1.6896895,\n -1.3192847, -2.2909331, -2.1295016, -2.0703764, -1.9394028,\n -2.041992 , -1.8279521, -1.690125 , -2.7230937, -2.3157165,\n -1.7527001, -2.2544892, -2.6310122, -2.0703619, -2.2476096,\n -1.8941168, -1.5398859, -1.5742403, -2.375471 , -1.9361446], dtype=float32)\n\n\n\ndef plot_2d_reconstruction(X, params, model, trained = False):\n X_hat = model.apply(params, X)\n plt.scatter(X[:, 0], X[:, 1], label=\"Original Data\")\n plt.scatter(X_hat[:, 0], X_hat[:, 1], label=\"Reconstructed Data\")\n if trained:\n plt.title(\"Trained\")\n else:\n plt.title(\"Untrained\")\n\n\nplot_2d_reconstruction(X, params, ae, False)\n\n\n\n\n\n\n\n\n\n\nDefine the Loss function\n\\(\\ell_2\\) penalty\n\ndiff = X - X_hat\n\n\ndiff.shape\n\n(100, 2)\n\n\n\ndiff[:5]\n\nDeviceArray([[-0.46981597, 5.271835 ],\n [ 1.6502905 , 3.6781619 ],\n [ 1.8507848 , 5.0589485 ],\n [ 2.8690844 , 4.5646677 ],\n [ 0.4905889 , 2.8893166 ]], dtype=float32)\n\n\n\n(diff**2).sum(axis=1).mean() / 2\n\nDeviceArray(7.9555416, dtype=float32)\n\n\n\n(diff**2).sum(axis=1)[:5]\n\nDeviceArray([28.01297 , 16.252333, 29.018364, 29.067837, 8.588828], dtype=float32)\n\n\n\n(jnp.linalg.norm(diff, ord=2, axis=1) ** 2).mean() / 2\n\nDeviceArray(7.955541, dtype=float32)\n\n\n\nfrom sklearn.metrics import mean_squared_error\n\n\nmean_squared_error(X, X_hat)\n\n7.9555407\n\n\n\nprint(2 * optax.l2_loss(X_hat, X).mean())\n\n\"\"\"\n\nMultplying by two\nDocstring says:\nCalculates the L2 loss for a set of predictions.\n\nNote: the 0.5 term is standard in \"Pattern Recognition and Machine Learning\"\nby Bishop, but not \"The Elements of Statistical Learning\" by Tibshirani.\n\"\"\"\n\n7.9555416\n\n\n'\\n\\nMultplying by two\\nDocstring says:\\nCalculates the L2 loss for a set of predictions.\\n\\nNote: the 0.5 term is standard in \"Pattern Recognition and Machine Learning\"\\nby Bishop, but not \"The Elements of Statistical Learning\" by Tibshirani.\\n'\n\n\n\n@jax.jit\ndef loss(params, X):\n X_hat = ae.apply(params, X)\n return 2 * optax.l2_loss(X_hat, X).mean()\n\n\nloss(params, X)\n\nDeviceArray(7.9555416, dtype=float32)\n\n\n\n\nDefining the train function\n\ndef train(\n X: jnp.array,\n optimizer: optax._src.base.GradientTransformation,\n model: nn.Module,\n key_param: jax.random.PRNGKey,\n n_iter: int=500,\n print_every: int=10\n):\n loss_array = np.zeros(n_iter)\n def loss(params, X):\n X_hat = model.apply(params, X)\n return 2 * optax.l2_loss(X_hat, X).mean()\n\n params = model.init(key_param, X)\n opt_state = optimizer.init(params)\n loss_grad_fn = jax.value_and_grad(loss)\n\n for i in range(n_iter):\n loss_val, grads = loss_grad_fn(params, X)\n loss_array[i] = loss_val.item()\n updates, opt_state = optimizer.update(grads, opt_state)\n params = optax.apply_updates(params, updates)\n if i % print_every == 0:\n print(\"Loss step {}: \".format(i), loss_val)\n return params, loss_array\n\n\noptimized_params, loss_array = train(\n X, optax.adam(learning_rate=0.1), ae, jax.random.PRNGKey(0), n_iter=30\n)\n\nLoss step 0: 7.9555416\nLoss step 10: 1.3104575\nLoss step 20: 0.544944\n\n\n\nplt.plot(loss_array)\nplt.xlabel(\"Iterations\")\n_ = plt.ylabel(\"Reconstruction loss\")\n\n\n\n\n\n\n\n\n\nplot_2d_reconstruction(X, optimized_params, ae, True)\n\n\n\n\n\n\n\n\n\nfrom sklearn import datasets\n\n\ndigits = datasets.load_digits()\n\n\nX = jnp.array(digits[\"data\"])\ny = digits[\"target\"]\n\n\nX.shape\n\n(1797, 64)\n\n\n\nplt.imshow(X[1].reshape(8, 8), cmap=\"Greys\")\ny[1]\n\n1\n\n\n\n\n\n\n\n\n\n\nbn = 2\nae_digits = AE(bn, X.shape[1])\n\nae_digits\n\nAE(\n # attributes\n bottleneck = 2\n out = 64\n)\n\n\n\nprint(ae_digits.tabulate(random.PRNGKey(0), X))\n\n\n AE Summary \n┏━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓\n┃ path ┃ module ┃ inputs ┃ outputs ┃ params ┃\n┡━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩\n│ │ AE │ float32[1797,… │ float32[1797,6… │ │\n├────────────────┼─────────┼────────────────┼─────────────────┼────────────────┤\n│ encoder │ Encoder │ float32[1797,… │ float32[1797,2] │ │\n├────────────────┼─────────┼────────────────┼─────────────────┼────────────────┤\n│ encoder/Dense… │ Dense │ float32[1797,… │ float32[1797,5] │ bias: │\n│ │ │ │ │ float32[5] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[64,5] │\n│ │ │ │ │ │\n│ │ │ │ │ 325 (1.3 KB) │\n├────────────────┼─────────┼────────────────┼─────────────────┼────────────────┤\n│ encoder/Dense… │ Dense │ float32[1797,… │ float32[1797,2] │ bias: │\n│ │ │ │ │ float32[2] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[5,2] │\n│ │ │ │ │ │\n│ │ │ │ │ 12 (48 B) │\n├────────────────┼─────────┼────────────────┼─────────────────┼────────────────┤\n│ decoder │ Decoder │ float32[1797,… │ float32[1797,6… │ │\n├────────────────┼─────────┼────────────────┼─────────────────┼────────────────┤\n│ decoder/Dense… │ Dense │ float32[1797,… │ float32[1797,5] │ bias: │\n│ │ │ │ │ float32[5] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[2,5] │\n│ │ │ │ │ │\n│ │ │ │ │ 15 (60 B) │\n├────────────────┼─────────┼────────────────┼─────────────────┼────────────────┤\n│ decoder/Dense… │ Dense │ float32[1797,… │ float32[1797,6… │ bias: │\n│ │ │ │ │ float32[64] │\n│ │ │ │ │ kernel: │\n│ │ │ │ │ float32[5,64] │\n│ │ │ │ │ │\n│ │ │ │ │ 384 (1.5 KB) │\n├────────────────┼─────────┼────────────────┼─────────────────┼────────────────┤\n│ │ │ │ Total │ 736 (2.9 KB) │\n└────────────────┴─────────┴────────────────┴─────────────────┴────────────────┘\n \n Total Parameters: 736 (2.9 KB) \n\n\n\n\n\nparams_digits = ae_digits.init(random.PRNGKey(0), X)\n\n\njax.tree_util.tree_map(lambda x: x.shape, params_digits)\n\nFrozenDict({\n params: {\n decoder: {\n Dense_0: {\n bias: (5,),\n kernel: (2, 5),\n },\n Dense_1: {\n bias: (64,),\n kernel: (5, 64),\n },\n },\n encoder: {\n Dense_0: {\n bias: (5,),\n kernel: (64, 5),\n },\n Dense_1: {\n bias: (2,),\n kernel: (5, 2),\n },\n },\n },\n})\n\n\n\ndef plot_encoding_2dim(encoder, params):\n assert encoder.bottleneck >= 2\n X_low = encoder.apply({\"params\": params[\"params\"][\"encoder\"]}, X)\n df = pd.DataFrame(X_low)\n df[\"label\"] = y\n sns.pairplot(df, hue=\"label\", palette=\"bright\")\n\n\n\nUntrained encodings\n\nplot_encoding_2dim(Encoder(bottleneck=bn), params_digits)\n\n\n\n\n\n\n\n\n\nX_recon = ae_digits.apply(params_digits, X)\n\n\ndef plot_orig_recon(index=0):\n fig, ax = plt.subplots(sharex=True, ncols=2)\n ax[0].imshow(X[index].reshape(8, 8), cmap=\"Greys\")\n ax[1].imshow(X_recon[index].reshape(8, 8), cmap=\"Greys\")\n ax[0].set_title(\"Original\")\n ax[1].set_title(\"Reconstructed\")\n\n\nplot_orig_recon(5)\n\n\n\n\n\n\n\n\n\noptimized_params_digits, loss_array_digits = train(\n X, optax.adam(learning_rate=0.01), ae_digits, jax.random.PRNGKey(0), n_iter=1000\n)\n\nLoss step 0: 90.91908\nLoss step 10: 62.609577\nLoss step 20: 58.390884\nLoss step 30: 53.54514\nLoss step 40: 45.062607\nLoss step 50: 33.541103\nLoss step 60: 25.167671\nLoss step 70: 21.107908\nLoss step 80: 19.424128\nLoss step 90: 18.734087\nLoss step 100: 18.47802\nLoss step 110: 18.390646\nLoss step 120: 18.352455\nLoss step 130: 18.333141\nLoss step 140: 18.321236\nLoss step 150: 18.311743\nLoss step 160: 18.3032\nLoss step 170: 18.295115\nLoss step 180: 18.287226\nLoss step 190: 18.279234\nLoss step 200: 18.270723\nLoss step 210: 18.26098\nLoss step 220: 18.2499\nLoss step 230: 18.237106\nLoss step 240: 18.221647\nLoss step 250: 18.20243\nLoss step 260: 18.177717\nLoss step 270: 18.14539\nLoss step 280: 18.105865\nLoss step 290: 18.058249\nLoss step 300: 18.000141\nLoss step 310: 17.931208\nLoss step 320: 17.84967\nLoss step 330: 17.755304\nLoss step 340: 17.65073\nLoss step 350: 17.537819\nLoss step 360: 17.418528\nLoss step 370: 17.293976\nLoss step 380: 17.164043\nLoss step 390: 17.029558\nLoss step 400: 16.89464\nLoss step 410: 16.760334\nLoss step 420: 16.626553\nLoss step 430: 16.493797\nLoss step 440: 16.362513\nLoss step 450: 16.234201\nLoss step 460: 16.11052\nLoss step 470: 15.992949\nLoss step 480: 15.883502\nLoss step 490: 15.783846\nLoss step 500: 15.694724\nLoss step 510: 15.615571\nLoss step 520: 15.54589\nLoss step 530: 15.483993\nLoss step 540: 15.427973\nLoss step 550: 15.376085\nLoss step 560: 15.326871\nLoss step 570: 15.280196\nLoss step 580: 15.23521\nLoss step 590: 15.191253\nLoss step 600: 15.149132\nLoss step 610: 15.109302\nLoss step 620: 15.071858\nLoss step 630: 15.037474\nLoss step 640: 15.005837\nLoss step 650: 14.977009\nLoss step 660: 14.950782\nLoss step 670: 14.927103\nLoss step 680: 14.905551\nLoss step 690: 14.885867\nLoss step 700: 14.867877\nLoss step 710: 14.851396\nLoss step 720: 14.836317\nLoss step 730: 14.8224125\nLoss step 740: 14.809575\nLoss step 750: 14.797547\nLoss step 760: 14.786259\nLoss step 770: 14.775562\nLoss step 780: 14.76545\nLoss step 790: 14.755904\nLoss step 800: 14.746771\nLoss step 810: 14.738021\nLoss step 820: 14.729595\nLoss step 830: 14.721415\nLoss step 840: 14.713423\nLoss step 850: 14.705618\nLoss step 860: 14.697898\nLoss step 870: 14.690201\nLoss step 880: 14.682494\nLoss step 890: 14.674812\nLoss step 900: 14.667133\nLoss step 910: 14.6593275\nLoss step 920: 14.651322\nLoss step 930: 14.643042\nLoss step 940: 14.634569\nLoss step 950: 14.625735\nLoss step 960: 14.616413\nLoss step 970: 14.6066065\nLoss step 980: 14.596094\nLoss step 990: 14.58464\n\n\n\nplt.plot(loss_array_digits)\n\n\n\n\n\n\n\n\n\n\nTrained encodings\n\nplot_encoding_2dim(Encoder(bottleneck=bn), optimized_params_digits)\n\n\n\n\n\n\n\n\n\n\nReconstruction\n\nX_recon = ae_digits.apply(optimized_params_digits, X)\nplot_orig_recon(4)\n\n\n\n\n\n\n\n\n\nX_reconstructed = ae.apply(params, X)\n\n\nerrs = jnp.square(X - X_reconstructed).sum(axis=1)\nerr_df = pd.DataFrame({\"error\": errs, \"label\": y})\nerr_df.groupby(\"label\").mean()\n\n\n\n\n\n\n\n\nerror\n\n\nlabel\n\n\n\n\n\n0\n1067.159668\n\n\n1\n1253.397217\n\n\n2\n1187.446655\n\n\n3\n730.839417\n\n\n4\n919.732239\n\n\n5\n1103.442505\n\n\n6\n913.172607\n\n\n7\n1309.424438\n\n\n8\n892.981750\n\n\n9\n891.891907\n\n\n\n\n\n\n\n\nerr_df = pd.DataFrame({\"error\": errs, \"label\": y})\n\n\nerr_df.groupby(\"label\").mean()\n\n\n\n\n\n\n\n\nerror\n\n\nlabel\n\n\n\n\n\n0\n1067.159668\n\n\n1\n1253.397217\n\n\n2\n1187.446655\n\n\n3\n730.839417\n\n\n4\n919.732239\n\n\n5\n1103.442505\n\n\n6\n913.172607\n\n\n7\n1309.424438\n\n\n8\n892.981750\n\n\n9\n891.891907\n\n\n\n\n\n\n\n\n\nConvoluational AE\n\nclass ConvEncoder(nn.Module):\n bottleneck: int\n\n @nn.compact\n def __call__(self, x):\n n = x.shape[0] # x is nx64\n x = x.reshape(n, 8, 8, 1)\n x = nn.Conv(features=4, kernel_size=(2, 2), strides=1, padding=0)(\n x\n ) # 8X8X1 -> 6x6X4\n x = nn.selu(x)\n x = nn.max_pool(x, window_shape=(2, 2), strides=(2, 2)) # 6x6x4 --> 3x3x4\n x = nn.selu(x)\n x = x.reshape(n, -1) # N X 3x3x4 -> N X 36\n x = nn.Dense(self.bottleneck)(x)\n return x\n\n\nce = ConvEncoder(2)\n#print(ce.tabulate(random.PRNGKey(0), X))\nprint(ce.tabulate(random.PRNGKey(0), X, console_kwargs={\"width\": 120}))\n\n\n ConvEncoder Summary \n┏━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ path ┃ module ┃ inputs ┃ outputs ┃ params ┃\n┡━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ │ ConvEncoder │ float32[1797,64] │ float32[1797,2] │ │\n├─────────┼─────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ Conv_0 │ Conv │ float32[1797,8,8,1] │ float32[1797,7,7,4] │ bias: float32[4] │\n│ │ │ │ │ kernel: float32[2,2,1,4] │\n│ │ │ │ │ │\n│ │ │ │ │ 20 (80 B) │\n├─────────┼─────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ Dense_0 │ Dense │ float32[1797,36] │ float32[1797,2] │ bias: float32[2] │\n│ │ │ │ │ kernel: float32[36,2] │\n│ │ │ │ │ │\n│ │ │ │ │ 74 (296 B) │\n├─────────┼─────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ │ │ │ Total │ 94 (376 B) │\n└─────────┴─────────────┴─────────────────────┴─────────────────────┴──────────────────────────┘\n \n Total Parameters: 94 (376 B) \n\n\n\n\n\nclass ConvDecoder(nn.Module):\n @nn.compact\n def __call__(self, x):\n x = nn.Dense(36)(x) # Nx2 --> Nx36\n x = nn.selu(x)\n x = x.reshape(-1, 3, 3, 4) # NX3X3X4\n x = nn.ConvTranspose(features=4, kernel_size=(2, 2), strides=(2, 2))(\n x\n ) # 3x3x4 -> 6x6X4\n x = nn.selu(x)\n x = nn.Conv(features=1, kernel_size=(1, 1), strides=1, padding=1)(\n x\n ) # 6x6x4 -> 8x8x1\n x = x.reshape(-1, 64)\n return x\n\n\ncd = ConvDecoder()\nprint(\n cd.tabulate(\n random.PRNGKey(0),\n jax.random.normal(key=jax.random.PRNGKey(0), shape=(1797, 2)),\n console_kwargs={\"width\": 120},\n )\n)\n\n\n ConvDecoder Summary \n┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ path ┃ module ┃ inputs ┃ outputs ┃ params ┃\n┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ │ ConvDecoder │ float32[1797,2] │ float32[1797,64] │ │\n├─────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ Dense_0 │ Dense │ float32[1797,2] │ float32[1797,36] │ bias: float32[36] │\n│ │ │ │ │ kernel: float32[2,36] │\n│ │ │ │ │ │\n│ │ │ │ │ 108 (432 B) │\n├─────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ ConvTranspose_0 │ ConvTranspose │ float32[1797,3,3,4] │ float32[1797,6,6,4] │ bias: float32[4] │\n│ │ │ │ │ kernel: float32[2,2,4,4] │\n│ │ │ │ │ │\n│ │ │ │ │ 68 (272 B) │\n├─────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ Conv_0 │ Conv │ float32[1797,6,6,4] │ float32[1797,8,8,1] │ bias: float32[1] │\n│ │ │ │ │ kernel: float32[1,1,4,1] │\n│ │ │ │ │ │\n│ │ │ │ │ 5 (20 B) │\n├─────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ │ │ │ Total │ 181 (724 B) │\n└─────────────────┴───────────────┴─────────────────────┴─────────────────────┴──────────────────────────┘\n \n Total Parameters: 181 (724 B) \n\n\n\n\n\nclass ConvAE(nn.Module):\n bottleneck: int\n\n def setup(self):\n # Alternative to @nn.compact -> explicitly define modules\n # Better for later when we want to access the encoder and decoder explicitly\n self.encoder = ConvEncoder(bottleneck=self.bottleneck)\n self.decoder = ConvDecoder()\n\n def __call__(self, x):\n\n z = self.encoder(x)\n x_hat = self.decoder(z)\n return x_hat\n\n\ncae = ConvAE(2)\nprint(\n cae.tabulate(\n random.PRNGKey(0),\n X,\n console_kwargs={\"width\": 120},\n )\n)\n\n\n ConvAE Summary \n┏━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ path ┃ module ┃ inputs ┃ outputs ┃ params ┃\n┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n│ │ ConvAE │ float32[1797,64] │ float32[1797,64] │ │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ encoder │ ConvEncoder │ float32[1797,64] │ float32[1797,2] │ │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ encoder/Conv_0 │ Conv │ float32[1797,8,8,1] │ float32[1797,7,7,4] │ bias: float32[4] │\n│ │ │ │ │ kernel: float32[2,2,1,4] │\n│ │ │ │ │ │\n│ │ │ │ │ 20 (80 B) │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ encoder/Dense_0 │ Dense │ float32[1797,36] │ float32[1797,2] │ bias: float32[2] │\n│ │ │ │ │ kernel: float32[36,2] │\n│ │ │ │ │ │\n│ │ │ │ │ 74 (296 B) │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ decoder │ ConvDecoder │ float32[1797,2] │ float32[1797,64] │ │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ decoder/Dense_0 │ Dense │ float32[1797,2] │ float32[1797,36] │ bias: float32[36] │\n│ │ │ │ │ kernel: float32[2,36] │\n│ │ │ │ │ │\n│ │ │ │ │ 108 (432 B) │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ decoder/ConvTranspose_0 │ ConvTranspose │ float32[1797,3,3,4] │ float32[1797,6,6,4] │ bias: float32[4] │\n│ │ │ │ │ kernel: float32[2,2,4,4] │\n│ │ │ │ │ │\n│ │ │ │ │ 68 (272 B) │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ decoder/Conv_0 │ Conv │ float32[1797,6,6,4] │ float32[1797,8,8,1] │ bias: float32[1] │\n│ │ │ │ │ kernel: float32[1,1,4,1] │\n│ │ │ │ │ │\n│ │ │ │ │ 5 (20 B) │\n├─────────────────────────┼───────────────┼─────────────────────┼─────────────────────┼──────────────────────────┤\n│ │ │ │ Total │ 275 (1.1 KB) │\n└─────────────────────────┴───────────────┴─────────────────────┴─────────────────────┴──────────────────────────┘\n \n Total Parameters: 275 (1.1 KB) \n\n\n\n\n\nparams = cae.init(random.PRNGKey(0), X)\n\n\nplot_encoding_2dim(ConvEncoder(bottleneck=2), params)\n\n\n\n\n\n\n\n\n\noptimized_params_digits_cae, loss_array_digits_cae = train(\n X, optax.adam(learning_rate=0.01), cae, jax.random.PRNGKey(0), n_iter=1000, print_every=50\n)\n\nLoss step 0: 61.916904\nLoss step 50: 30.379993\nLoss step 100: 27.855324\nLoss step 150: 26.851124\nLoss step 200: 25.77603\nLoss step 250: 25.184359\nLoss step 300: 24.772747\nLoss step 350: 24.351847\nLoss step 400: 24.091908\nLoss step 450: 23.887573\nLoss step 500: 23.72832\nLoss step 550: 23.607725\nLoss step 600: 23.514961\nLoss step 650: 23.419945\nLoss step 700: 23.363184\nLoss step 750: 23.30127\nLoss step 800: 23.258532\nLoss step 850: 23.206999\nLoss step 900: 23.162285\nLoss step 950: 23.13027\n\n\n\nplot_encoding_2dim(ConvEncoder(bottleneck=2), optimized_params_digits_cae)\n\n\n\n\n\n\n\n\n\n\nBayesOpt for optimizing the latent dimension\n\ndef black_box_function(x, y):\n \"\"\"Function with unknown internals we wish to maximize.\n\n This is just serving as an example, for all intents and\n purposes think of the internals of this function, i.e.: the process\n which generates its output values, as unknown.\n \"\"\"\n x = int(x)\n y = int(y)\n return function_discrete(x, y)\n\n\ndef function_discrete(x, y):\n assert type(x) ==int\n return -(x**2) - (y - 1) ** 2 + 1\n\n\npbounds = {\"x\": (2, 4), \"y\": (-3, 3)}\n\n\noptimizer = BayesianOptimization(\n f=black_box_function,\n pbounds=pbounds,\n verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent\n random_state=1,\n)\n\n\noptimizer.maximize()\n\n| iter | target | x | y |\n-------------------------------------------------\n| 1 | -3.0 | 2.834 | 1.322 |\n| 2 | -7.0 | 2.0 | -1.186 |\n| 3 | -12.0 | 2.294 | -2.446 |\n| 4 | -4.0 | 2.373 | -0.9266 |\n| 5 | -4.0 | 2.794 | 0.2329 |\n| 6 | -15.0 | 4.0 | 1.331 |\n| 7 | -4.0 | 2.348 | 0.8879 |\n| 8 | -3.0 | 2.797 | 1.257 |\n| 9 | -4.0 | 2.064 | 2.229 |\n| 10 | -9.0 | 3.657 | -0.9428 |\n| 11 | -7.0 | 2.901 | 3.0 |\n| 12 | -4.0 | 2.0 | -0.1486 |\n| 13 | -31.0 | 4.0 | -3.0 |\n| 14 | -7.0 | 2.0 | 3.0 |\n| 15 | -3.0 | 2.0 | 1.539 |\n| 16 | -3.0 | 2.512 | 1.792 |\n| 17 | -19.0 | 4.0 | 3.0 |\n| 18 | -4.0 | 2.831 | -0.4655 |\n| 19 | -4.0 | 2.402 | -0.3286 |\n| 20 | -9.0 | 3.539 | 0.08748 |\n| 21 | -7.0 | 2.841 | -1.217 |\n| 22 | -4.0 | 2.764 | 2.245 |\n| 23 | -4.0 | 2.0 | 0.4436 |\n| 24 | -3.0 | 2.469 | 1.423 |\n| 25 | -3.0 | 2.0 | 1.16 |\n| 26 | -3.0 | 2.787 | 1.714 |\n| 27 | -4.0 | 2.932 | 0.7853 |\n| 28 | -3.0 | 2.647 | 1.526 |\n| 29 | -3.0 | 2.148 | 1.373 |\n| 30 | -3.0 | 2.212 | 1.795 |\n=================================================\n\n\n\noptimizer.max\n\n{'target': -3.0, 'params': {'x': 2.8340440094051482, 'y': 1.3219469606529488}}\n\n\n\n{k: int(v) for k, v in optimizer.max[\"params\"].items()}\n\n{'x': 2, 'y': 1}\n\n\n\nfunction_discrete(2, 1)\n\n-3\n\n\nLet us keep a separate validation set\n\ndef loss_model(params, X, model):\n X_hat = model.apply(params, X)\n diff = X - X_hat\n return (diff**2).sum(axis=1).mean() / X.shape[1]\n\n\nfrom functools import partial\n\ne = partial(loss_model, model=cae)\ne(params, X)\n\nDeviceArray(61.916904, dtype=float32)\n\n\n\ndef validation_loss_discrete(bn):\n assert type(bn) == int\n\n # Train the model on bn sized bottleneck\n cae = ConvAE(bn)\n loss_fn_concrete = jax.jit(partial(loss_model, model=cae))\n loss_grad_fn = jax.value_and_grad(loss_fn_concrete)\n tx = optax.adam(learning_rate=1e-2)\n params = cae.init(random.PRNGKey(0), X_train)\n opt_state = tx.init(params)\n print(f\"--------Bottleneck of Size: {bn}-------------\")\n for i in range(30):\n loss_val, grads = loss_grad_fn(params, X_train)\n updates, opt_state = tx.update(grads, opt_state)\n params = optax.apply_updates(params, updates)\n\n if i % 5 == 0:\n print(\"Loss step {}: \".format(i), loss_val)\n print(f\"--------End-------------\")\n\n # Evaluate on validation dataset\n return loss_fn_concrete(params, X_validation)\n\n\nX_train, X_validation = X[:1000], X[1000:]\n\n\nvalidation_loss_discrete(2)\n\n--------Bottleneck of Size: 2-------------\nLoss step 0: 62.27715\nLoss step 5: 58.5037\nLoss step 10: 53.984245\nLoss step 15: 49.513382\nLoss step 20: 43.078316\nLoss step 25: 38.30596\n--------End-------------\n\n\nDeviceArray(36.75615, dtype=float32)\n\n\n\ndef validation_loss_bb(bn):\n bn_int = int(bn)\n return -validation_loss_discrete(bn_int)\n\n\nvalidation_loss_bb(2.5)\n\n--------Bottleneck of Size: 2-------------\nLoss step 0: 62.27715\nLoss step 5: 58.5037\nLoss step 10: 53.984245\nLoss step 15: 49.513382\nLoss step 20: 43.078316\nLoss step 25: 38.30596\n--------End-------------\n\n\nDeviceArray(-36.75615, dtype=float32)\n\n\n\npbounds = {\"bn\": (1, 40)}\noptimizer = BayesianOptimization(\n f=validation_loss_bb,\n pbounds=pbounds,\n verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent\n random_state=1,\n)\n\n\noptimizer.maximize(n_iter=8)\n\n| iter | target | bn |\n-------------------------------------\n--------Bottleneck of Size: 17-------------\nLoss step 0: 62.85297\nLoss step 5: 52.85449\nLoss step 10: 40.903214\nLoss step 15: 35.32036\nLoss step 20: 35.3193\nLoss step 25: 33.33418\n--------End-------------\n| 1 | -32.36 | 17.26 |\n--------Bottleneck of Size: 29-------------\nLoss step 0: 64.064514\nLoss step 5: 53.85875\nLoss step 10: 47.26749\nLoss step 15: 43.828564\nLoss step 20: 41.847286\nLoss step 25: 39.23966\n--------End-------------\n| 2 | -37.29 | 29.09 |\n--------Bottleneck of Size: 1-------------\nLoss step 0: 60.969757\nLoss step 5: 58.92785\nLoss step 10: 53.683678\nLoss step 15: 49.58035\nLoss step 20: 45.86102\nLoss step 25: 44.17104\n--------End-------------\n| 3 | -42.48 | 1.004 |\n--------Bottleneck of Size: 12-------------\nLoss step 0: 63.704227\nLoss step 5: 57.338806\nLoss step 10: 49.537926\nLoss step 15: 41.210827\nLoss step 20: 38.469257\nLoss step 25: 35.276833\n--------End-------------\n| 4 | -34.07 | 12.79 |\n--------Bottleneck of Size: 6-------------\nLoss step 0: 61.450924\nLoss step 5: 55.82548\nLoss step 10: 47.88899\nLoss step 15: 40.131763\nLoss step 20: 37.62544\nLoss step 25: 35.873016\n--------End-------------\n| 5 | -34.2 | 6.723 |\n--------Bottleneck of Size: 20-------------\nLoss step 0: 61.81845\nLoss step 5: 56.358246\nLoss step 10: 51.92751\nLoss step 15: 47.312576\nLoss step 20: 42.146885\nLoss step 25: 37.025486\n--------End-------------\n| 6 | -33.86 | 20.39 |\n--------Bottleneck of Size: 40-------------\nLoss step 0: 61.5667\nLoss step 5: 49.598972\nLoss step 10: 42.639145\nLoss step 15: 39.22532\nLoss step 20: 36.597954\nLoss step 25: 34.528015\n--------End-------------\n| 7 | -32.67 | 40.0 |\n--------Bottleneck of Size: 36-------------\nLoss step 0: 62.303535\nLoss step 5: 52.075367\nLoss step 10: 44.435425\nLoss step 15: 40.889286\nLoss step 20: 39.280178\nLoss step 25: 37.09512\n--------End-------------\n| 8 | -35.77 | 36.05 |\n--------Bottleneck of Size: 9-------------\nLoss step 0: 63.35566\nLoss step 5: 52.45499\nLoss step 10: 43.281902\nLoss step 15: 37.028984\nLoss step 20: 35.006325\nLoss step 25: 33.583298\n--------End-------------\n| 9 | -33.01 | 9.596 |\n--------Bottleneck of Size: 24-------------\nLoss step 0: 62.888515\nLoss step 5: 52.035835\nLoss step 10: 42.154068\nLoss step 15: 36.804348\nLoss step 20: 34.53549\nLoss step 25: 32.37921\n--------End-------------\n| 10 | -30.08 | 24.26 |\n--------Bottleneck of Size: 25-------------\nLoss step 0: 63.406757\nLoss step 5: 50.291225\nLoss step 10: 41.73214\nLoss step 15: 38.421593\nLoss step 20: 37.0491\nLoss step 25: 34.847046\n--------End-------------\n| 11 | -33.89 | 25.81 |\n--------Bottleneck of Size: 22-------------\nLoss step 0: 62.303898\nLoss step 5: 53.713398\nLoss step 10: 47.806355\nLoss step 15: 43.550034\nLoss step 20: 42.033653\nLoss step 25: 39.68766\n--------End-------------\n| 12 | -38.51 | 22.8 |\n--------Bottleneck of Size: 24-------------\nLoss step 0: 62.888515\nLoss step 5: 52.035835\nLoss step 10: 42.154068\nLoss step 15: 36.804348\nLoss step 20: 34.53549\nLoss step 25: 32.37921\n--------End-------------\n| 13 | -30.08 | 24.3 |\n=====================================\n\n\n\noptimizer.max\n\n{'target': -30.082199096679688, 'params': {'bn': 24.25939633195359}}\n\n\n\n\nVAE\n\nclass VAE_Encoder(nn.Module):\n bottleneck: int\n\n @nn.compact\n def __call__(self, x):\n x = nn.Dense(5)(x)\n x = nn.selu(x)\n mu = nn.Dense(features=self.bottleneck)(x)\n log_std = nn.Dense(features=self.bottleneck)(x)\n return mu, log_std\n\n\ndef reparameterize(mu, log_std, key=random.PRNGKey(0), samples=1):\n std = jnp.exp(log_std)\n eps = random.normal(key=key, shape=(samples,))\n return mu + eps * std\n\n\nsamples = reparameterize(2, jnp.log(1), samples=5000)\nsns.kdeplot(samples)\nplt.title(f\"Mean:{jnp.mean(samples):0.2f}, stddev: {jnp.std(samples):0.2f}\")\n\nText(0.5, 1.0, 'Mean:2.00, stddev: 1.00')\n\n\n\n\n\n\n\n\n\n\nclass VAE(nn.Module):\n bottleneck: int\n out: int\n\n def setup(self):\n # Alternative to @nn.compact -> explicitly define modules\n # Better for later when we want to access the encoder and decoder explicitly\n self.encoder = VAE_Encoder(bottleneck=self.bottleneck)\n self.decoder = Decoder(out=self.out)\n\n def __call__(self, x, rng=random.PRNGKey(0)):\n mu, log_std = self.encoder(x)\n z = reparameterize(mu, log_std, key=rng)\n x_hat = self.decoder(z)\n return x_hat, mu, log_std\n\n\nvae = VAE(bottleneck=2, out=64)\n\n\nparams = vae.init(random.PRNGKey(10), X)\n\n\nplt.imshow(vae.apply(params, X)[0][0].reshape(8, 8))\n\n\n\n\n\n\n\n\n\nvae.apply(params, X, random.PRNGKey(10))[0][0].reshape(8, 8)\n\nDeviceArray([[ -3999.399 , 6091.6396 , -2634.2932 , 307.47302 ,\n 3932.0298 , 1823.3352 , 3852.157 , 5576.5605 ],\n [ -8809.304 , 5299.91 , 286.5227 , 1059.3925 ,\n -951.62537 , -6623.4824 , -1463.6239 , 16223.624 ],\n [ -5279.1323 , -7333.815 , -71.1485 , 5679.2773 ,\n 1384.2794 , 8326.92 , -1747.943 , -4802.341 ],\n [ 403.3739 , 13455.688 , -7414.195 , 7299.713 ,\n 1180.7408 , -328.49432 , 6619.1357 , 363.74713 ],\n [ -4376.3506 , -2045.3063 , 2618.412 , -10890.402 ,\n -3035.3848 , -3574.7527 , -5057.2593 , -1859.8529 ],\n [ -53.99241 , 2318.109 , -1323.9087 , -6801.4814 ,\n -7300.1553 , 865.4169 , 13349.937 , 865.3773 ],\n [ 37.275284, -3962.8357 , 1771.9886 , -7992.7188 ,\n 4896.562 , -17371.383 , 4737.3887 , 7307.3384 ],\n [ -221.0234 , -5475.8447 , 4189.172 , -1095.9471 ,\n -6452.915 , 3767.8381 , -10514.758 , -2311.0862 ]], dtype=float32)\n\n\n\nvae_e = VAE_Encoder(2)\nmu, log_sigma = vae_e.apply({\"params\": params[\"params\"][\"encoder\"]}, X)\n\n\ntfd = tfp.distributions\n\n\nq\n\nNameError: name 'q' is not defined\n\n\n\ntfd.kl_divergence(q, p).shape\n\n\ntfd.kl_divergence(q, p).mean()\n\n\nq.stddev()\n\n\n\nLoss\n\n@jax.jit\ndef loss_vae(params, X, rng=random.PRNGKey(0)):\n X_hat, mu, log_sigma = vae.apply(params, X, rng)\n q = tfd.Normal(loc=mu, scale=jnp.exp(log_sigma))\n p = tfd.Normal(loc=0.0, scale=1.0)\n kl_loss = tfd.kl_divergence(q, p).mean()\n\n diff = X - X_hat\n recon_loss = (diff**2).sum(axis=1).mean() / X.shape[1]\n\n return recon_loss + 0.0020 * kl_loss\n\n\nloss_vae(params, X, random.PRNGKey(4))\n\n\nimport optax\n\nlearning_rate = 0.01\ntx = optax.adam(learning_rate=learning_rate)\nopt_state = tx.init(params)\nloss_grad_fn = jax.value_and_grad(loss_vae)\n\n\nfor i in range(2001):\n rng, key = random.split(rng)\n loss_val, grads = loss_grad_fn(params, X, rng)\n updates, opt_state = tx.update(grads, opt_state)\n params = optax.apply_updates(params, updates)\n if i % 50 == 0:\n print(\"Loss step {}: \".format(i), loss_val)\n\n\nX_recon, _, _ = vae.apply(params, X)\n\n\nplot_orig_recon(8)\n\n\ndec = Decoder(out=64)\nN = 10\nx_range = jnp.linspace(-2, 2, N)\nfig, ax = plt.subplots(ncols=N, sharey=True, figsize=(20, 4))\nfor i in range(N):\n ax[i].imshow(\n dec.apply(\n {\"params\": params[\"params\"][\"decoder\"]}, jnp.array([x_range[i], 0.0])\n ).reshape(8, 8),\n cmap=\"Greys\",\n )\n\n\ndef plot_encoding_2dim_vae(encoder, params):\n assert encoder.bottleneck >= 2\n mu, log_sigma = encoder.apply({\"params\": params[\"params\"][\"encoder\"]}, X)\n df = pd.DataFrame(mu)\n df[\"label\"] = y\n sns.pairplot(df, hue=\"label\", palette=\"bright\")\n\n\nvae_enc = VAE_Encoder(2)\nmu, log_sigma = vae_enc.apply({\"params\": params[\"params\"][\"encoder\"]}, X)\n# plot_encoding_2dim_vae(VAE_Encoder(2), params)\n\n\nplot_encoding_2dim_vae(vae_enc, params)\n\n\n\nTODO\n\nregular AE: Bayesopt for latent dimension\ngeneration from regular AE\ngraph of reconstruction loss v/s latent dimension for regular AE\nGIF for walking in latent space for VAE\nReconstruction as a factor of Recon + Beta X KL\nGet the Encoder from AE object directly\nImpact of MC samples\nReconstruction v/s Expected Log Likelihood (confirm the trend is same for both)\nCleanup code so that can be reused rather than copy pasting\nSparse VAE\nAdd references\nAdd bib entry\nConsider CNNs for more realistic datasets\n\n\nhttps://lilianweng.github.io/posts/2018-08-12-vae/\nhttps://theaisummer.com/jax-tensorflow-pytorch/\nhttps://dmol.pub/dl/VAE.html" }, { "objectID": "posts/2020-03-26-gp.html", @@ -585,14 +592,14 @@ "href": "posts/2020-03-26-gp.html#air-quality-2d-map", "title": "Some experiments in Gaussian Processes Regression", "section": "Air quality 2d map", - "text": "Air quality 2d map\nNow, we will be using GPs for predicting air quality in New Delhi. See my previous post on how to get AQ data for Delhi.https://nipunbatra.github.io/blog/air%20quality/2018/06/21/aq-india-map.html\nI will be creating a function to visualise the AQ estimations using GPs based on different kernels.\nThe shapefile for Delhi can be downloaded from here.\n\nimport pandas as pd\nimport os\ndf = pd.read_csv(os.path.expanduser(\"~/Downloads/2018-04-06.csv\"))\ndf = df[(df.country=='IN')&(df.city=='Delhi')&(df.parameter=='pm25')].dropna().groupby(\"location\").mean()\n\n\ndf\n\n\n\n\n\n\n\n\n\nvalue\nlatitude\nlongitude\n\n\nlocation\n\n\n\n\n\n\n\nBurari Crossing, New Delhi - IMD\n245.583333\n28.725650\n77.201157\n\n\nCRRI Mathura Road, New Delhi - IMD\n265.666667\n28.551200\n77.273574\n\n\nDTU, New Delhi - CPCB\n214.333333\n28.750050\n77.111261\n\n\nIGI Airport Terminal - 3, New Delhi - IMD\n130.666667\n28.562776\n77.118005\n\n\nIHBAS, Dilshad Garden,New Delhi - CPCB\n212.583333\n28.680275\n77.201157\n\n\nITO, New Delhi - CPCB\n220.500000\n28.631694\n77.249439\n\n\nLodhi Road, New Delhi - IMD\n176.083333\n28.591825\n77.227307\n\n\nMandir Marg, New Delhi - DPCC\n82.000000\n28.637269\n77.200560\n\n\nNSIT Dwarka, New Delhi - CPCB\n184.583333\n28.609090\n77.032541\n\n\nNorth Campus, DU, New Delhi - IMD\n147.833333\n28.657381\n77.158545\n\n\nPusa, New Delhi - IMD\n112.000000\n28.610304\n77.099694\n\n\nR K Puram, New Delhi - DPCC\n103.600000\n28.564610\n77.167010\n\n\nShadipur, New Delhi - CPCB\n213.833333\n28.651478\n77.147311\n\n\nSirifort, New Delhi - CPCB\n222.250000\n28.550425\n77.215938\n\n\nUS Diplomatic Post: New Delhi\n46.625000\n28.635760\n77.224450\n\n\n\n\n\n\n\n\n\nimport geopandas\ngdf = geopandas.GeoDataFrame(\n df, geometry=geopandas.points_from_xy(df.longitude, df.latitude))\n\n\ngdf.plot()\n\n\n\n\n\n\n\n\n\ndef plot_air_vis(df, k, shp, title):\n m = GPy.models.GPRegression(df[['longitude','latitude']], df[['value']], k)\n m.optimize(max_iters=2000)\n y_t = np.linspace(28.38,28.9, 40)\n x_t = np.linspace(76.82, 77.4, 40)\n\n XX, YY = np.meshgrid(x_t, y_t)\n Z_pred = np.zeros_like(YY)\n Z_var = np.zeros_like(YY)\n for i in range(40):\n for j in range(40):\n Z_pred[i, j], Z_var[i, j] = m.predict_noiseless(np.array([XX[i, j], YY[i, j]]).reshape(1, 2))\n \n data = geopandas.read_file(fp)\n fig = plt.figure(figsize=(6, 6))\n plt.contourf(XX, YY, Z_pred, levels=30,alpha=0.6,cmap='Purples')\n plt.colorbar()\n gdf.plot(ax=plt.gca(),markersize=gdf['value'],color='k')\n data.plot(color='k',ax=plt.gca(),zorder=-1,alpha=0.4)\n plt.gca().set_aspect(\"equal\")\n for a in [100, 150, 200,250]:\n plt.scatter([], [], c='k', alpha=1, s=a,\n label=str(a) + '$\\mu g/m^3$')\n plt.legend(scatterpoints=1, frameon=True,\n labelspacing=1, loc='upper left',ncol=2)\n \n plt.title(title+\"\\t\"+str(m.objective_function()))\n\n\nk_2d = GPy.kern.RBF(input_dim=2, lengthscale=1)\nk_2d_rbf_2 = GPy.kern.RBF(input_dim=2, lengthscale=3)*k_2d\nk_2d_rbf_3 = GPy.kern.RBF(input_dim=2, lengthscale=3) + k_2d_rbf_2\nk_matern32 = GPy.kern.Matern32(input_dim=2)\nk_matern52 = GPy.kern.Matern52(input_dim=2)\n\nk_rbf_matern = k_matern32 * k_matern52 + k_matern32*k_2d_rbf_3\n\n\nfp=os.path.expanduser(\"~/Downloads/wards delimited.shp\")\n\n\nplot_air_vis(df, k_2d, fp,\"RBF\")\nplot_air_vis(df, k_matern32, fp,\"Matern32\")\nplot_air_vis(df, k_matern52, fp,\"matern52\")\nplot_air_vis(df, k_2d_rbf_2, fp,\"RBF*RBF\")\nplot_air_vis(df, k_2d_rbf_3, fp,\"RBF*RBF+RBF\")\nplot_air_vis(df, k_rbf_matern, fp,\"Matern32*Matern52+Matern32*RBF\")\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nThere you go. Till next time!" + "text": "Air quality 2d map\nNow, we will be using GPs for predicting air quality in New Delhi. See my previous post on how to get AQ data for Delhi.https://nipunbatra.github.io/blog/air%20quality/2018/06/21/aq-india-map.html\nI will be creating a function to visualise the AQ estimations using GPs based on different kernels.\nThe shapefile for Delhi can be downloaded from here.\n\nimport pandas as pd\nimport os\ndf = pd.read_csv(os.path.expanduser(\"~/Downloads/2018-04-06.csv\"))\ndf = df[(df.country=='IN')&(df.city=='Delhi')&(df.parameter=='pm25')].dropna().groupby(\"location\").mean()\n\n\ndf\n\n\n\n\n\n\n\n\nvalue\nlatitude\nlongitude\n\n\nlocation\n\n\n\n\n\n\n\nBurari Crossing, New Delhi - IMD\n245.583333\n28.725650\n77.201157\n\n\nCRRI Mathura Road, New Delhi - IMD\n265.666667\n28.551200\n77.273574\n\n\nDTU, New Delhi - CPCB\n214.333333\n28.750050\n77.111261\n\n\nIGI Airport Terminal - 3, New Delhi - IMD\n130.666667\n28.562776\n77.118005\n\n\nIHBAS, Dilshad Garden,New Delhi - CPCB\n212.583333\n28.680275\n77.201157\n\n\nITO, New Delhi - CPCB\n220.500000\n28.631694\n77.249439\n\n\nLodhi Road, New Delhi - IMD\n176.083333\n28.591825\n77.227307\n\n\nMandir Marg, New Delhi - DPCC\n82.000000\n28.637269\n77.200560\n\n\nNSIT Dwarka, New Delhi - CPCB\n184.583333\n28.609090\n77.032541\n\n\nNorth Campus, DU, New Delhi - IMD\n147.833333\n28.657381\n77.158545\n\n\nPusa, New Delhi - IMD\n112.000000\n28.610304\n77.099694\n\n\nR K Puram, New Delhi - DPCC\n103.600000\n28.564610\n77.167010\n\n\nShadipur, New Delhi - CPCB\n213.833333\n28.651478\n77.147311\n\n\nSirifort, New Delhi - CPCB\n222.250000\n28.550425\n77.215938\n\n\nUS Diplomatic Post: New Delhi\n46.625000\n28.635760\n77.224450\n\n\n\n\n\n\n\n\nimport geopandas\ngdf = geopandas.GeoDataFrame(\n df, geometry=geopandas.points_from_xy(df.longitude, df.latitude))\n\n\ngdf.plot()\n\n\n\n\n\n\n\n\n\ndef plot_air_vis(df, k, shp, title):\n m = GPy.models.GPRegression(df[['longitude','latitude']], df[['value']], k)\n m.optimize(max_iters=2000)\n y_t = np.linspace(28.38,28.9, 40)\n x_t = np.linspace(76.82, 77.4, 40)\n\n XX, YY = np.meshgrid(x_t, y_t)\n Z_pred = np.zeros_like(YY)\n Z_var = np.zeros_like(YY)\n for i in range(40):\n for j in range(40):\n Z_pred[i, j], Z_var[i, j] = m.predict_noiseless(np.array([XX[i, j], YY[i, j]]).reshape(1, 2))\n \n data = geopandas.read_file(fp)\n fig = plt.figure(figsize=(6, 6))\n plt.contourf(XX, YY, Z_pred, levels=30,alpha=0.6,cmap='Purples')\n plt.colorbar()\n gdf.plot(ax=plt.gca(),markersize=gdf['value'],color='k')\n data.plot(color='k',ax=plt.gca(),zorder=-1,alpha=0.4)\n plt.gca().set_aspect(\"equal\")\n for a in [100, 150, 200,250]:\n plt.scatter([], [], c='k', alpha=1, s=a,\n label=str(a) + '$\\mu g/m^3$')\n plt.legend(scatterpoints=1, frameon=True,\n labelspacing=1, loc='upper left',ncol=2)\n \n plt.title(title+\"\\t\"+str(m.objective_function()))\n\n\nk_2d = GPy.kern.RBF(input_dim=2, lengthscale=1)\nk_2d_rbf_2 = GPy.kern.RBF(input_dim=2, lengthscale=3)*k_2d\nk_2d_rbf_3 = GPy.kern.RBF(input_dim=2, lengthscale=3) + k_2d_rbf_2\nk_matern32 = GPy.kern.Matern32(input_dim=2)\nk_matern52 = GPy.kern.Matern52(input_dim=2)\n\nk_rbf_matern = k_matern32 * k_matern52 + k_matern32*k_2d_rbf_3\n\n\nfp=os.path.expanduser(\"~/Downloads/wards delimited.shp\")\n\n\nplot_air_vis(df, k_2d, fp,\"RBF\")\nplot_air_vis(df, k_matern32, fp,\"Matern32\")\nplot_air_vis(df, k_matern52, fp,\"matern52\")\nplot_air_vis(df, k_2d_rbf_2, fp,\"RBF*RBF\")\nplot_air_vis(df, k_2d_rbf_3, fp,\"RBF*RBF+RBF\")\nplot_air_vis(df, k_rbf_matern, fp,\"Matern32*Matern52+Matern32*RBF\")\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nThere you go. Till next time!" }, { "objectID": "posts/auto-pytorch.html", "href": "posts/auto-pytorch.html", "title": "AutoML PyTorch", "section": "", - "text": "In this post, we look at AutoPyTorch, a framework for automated machine learning.\nimport os\nimport tempfile as tmp\nimport warnings\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\n%config InlineBackend.figure_format = 'retina'\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\nfrom autoPyTorch.api.tabular_regression import TabularRegressionTask\nX, y = sklearn.datasets.load_diabetes(return_X_y=True, as_frame=True)\nX_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=1,\n)\n\n# Obtain training and validation data\nX_train, X_valid, y_train, y_valid = sklearn.model_selection.train_test_split(\n X_train,\n y_train,\n random_state=1,\n)\nX_train.head()\n\n\n\n\n\n\n\n\n\nage\nsex\nbmi\nbp\ns1\ns2\ns3\ns4\ns5\ns6\n\n\n\n\n52\n-0.052738\n-0.044642\n-0.009439\n-0.005671\n0.039710\n0.044719\n0.026550\n-0.002592\n-0.018118\n-0.013504\n\n\n121\n0.063504\n-0.044642\n0.017506\n0.021872\n0.008063\n0.021546\n-0.036038\n0.034309\n0.019908\n0.011349\n\n\n170\n0.023546\n0.050680\n-0.020218\n-0.036656\n-0.013953\n-0.015092\n0.059685\n-0.039493\n-0.096433\n-0.017646\n\n\n287\n0.045341\n-0.044642\n-0.006206\n-0.015999\n0.125019\n0.125198\n0.019187\n0.034309\n0.032433\n-0.005220\n\n\n397\n0.052606\n-0.044642\n-0.004050\n-0.030918\n-0.046975\n-0.058307\n-0.013948\n-0.025840\n0.036056\n0.023775\ny_train.head()\n\n52 59.0\n121 173.0\n170 47.0\n287 219.0\n397 198.0\nName: target, dtype: float64\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.metrics import mean_squared_error\n\nrf = RandomForestRegressor(random_state=1)\nrf.fit(X_train, y_train)\ny_pred_rf = rf.predict(X_test)\nnp.sqrt(mean_squared_error(y_test, y_pred_rf))\n\n62.77500577100372\npred_df = pd.DataFrame({\"rf\": y_pred_rf, \"true\": y_test})\npred_df.head()\n\n\n\n\n\n\n\n\n\nrf\ntrue\n\n\n\n\n246\n140.76\n78.0\n\n\n425\n109.89\n152.0\n\n\n293\n161.93\n200.0\n\n\n31\n70.81\n59.0\n\n\n359\n150.91\n311.0\n# Use validation dataset to find best hyperparameters for RF\nrf = RandomForestRegressor(random_state=1)\nhyperparameters = {\"n_estimators\": [10, 100, 1000], \"max_depth\": [1, 5, 10]}\n\nfrom sklearn.model_selection import GridSearchCV\n\ngrid_search = GridSearchCV(rf, hyperparameters, cv=5, scoring=\"neg_root_mean_squared_error\")\ngrid_search.fit(X_valid, y_valid)\n\ngrid_search.best_params_\n\n\n{'max_depth': 5, 'n_estimators': 100}\n# Train the RF model using the best hyperparameters on train + validation data\n\nrf = RandomForestRegressor(**grid_search.best_params_, random_state=1)\n# Combine train and validation data\nX_train_overall = pd.concat([X_train, X_valid])\ny_train_overall = pd.concat([y_train, y_valid])\nrf.fit(X_train_overall, y_train_overall)\ny_pred_rf = rf.predict(X_test)\nnp.sqrt(mean_squared_error(y_test, y_pred_rf))\n\n61.69476644955032\napi = TabularRegressionTask()\n\n# Do an api search without any memory limit but use only MLPs\n\napi.search(\n X_train=X_train_overall,\n y_train=y_train_overall,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='r2',\n total_walltime_limit=80,\n func_eval_time_limit_secs=10,\n dataset_name=\"Diabetes\",\n memory_limit=None,\n enable_traditional_pipeline=True,\n )\n\n<autoPyTorch.api.tabular_regression.TabularRegressionTask at 0x1945be6d0>\ny_pred = api.predict(X_test)\nscore = api.score(y_pred, y_test)\nprint(score)\nnp.sqrt(mean_squared_error(y_test, y_pred))\n\n{'r2': 0.3026643977627368}\n\n\n60.33987680300709\n# Print statistics from search\nprint(api.sprint_statistics())\n\nautoPyTorch results:\n Dataset name: Diabetes\n Optimisation Metric: r2\n Best validation score: 0.4352600925944532\n Number of target algorithm runs: 13\n Number of successful target algorithm runs: 10\n Number of crashed target algorithm runs: 2\n Number of target algorithms that exceeded the time limit: 1\n Number of target algorithms that exceeded the memory limit: 0\napi.get_models_with_weights()[0]\n\n(0.62,\n MyTraditionalTabularRegressionPipeline(config='random_forest',\n dataset_properties={'categorical_columns': [],\n 'categories': [],\n 'input_shape': (10,),\n 'is_small_preprocess': True,\n 'issigned': True,\n 'issparse': False,\n 'numerical_columns': [0,\n 1,\n 2,\n 3,\n 4,\n 5,\n 6,\n 7,\n 8,\n 9],\n 'output_shape': 1,\n 'output_type': 'continuous',\n 'target_type': 'tabular_regression',\n 'task_type': 'tabular_regression'},\n init_params={'instance': None},\n random_state=RandomState(MT19937) at 0x194150240))\napi.get_models_with_weights()[1]\n\n(0.18,\n ________________________________________\n TabularRegressionPipeline\n ________________________________________\n 0-) imputer: \n SimpleImputer\n \n 1-) variance_threshold: \n VarianceThreshold\n \n 2-) coalescer: \n NoCoalescer\n \n 3-) encoder: \n NoEncoder\n \n 4-) scaler: \n StandardScaler\n \n 5-) feature_preprocessor: \n NoFeaturePreprocessor\n \n 6-) tabular_transformer: \n TabularColumnTransformer\n \n 7-) preprocessing: \n EarlyPreprocessing\n \n 8-) network_embedding: \n autoPyTorch.pipeline NoEmbedding\n \n 9-) network_backbone: \n autoPyTorch.pipeline ShapedMLPBackbone\n \n 10-) network_head: \n autoPyTorch.pipeline FullyConnectedHead\n \n 11-) network: \n Sequential ({'random_state': RandomState(MT19937) at 0x19465E140, '_fit_requirements': [FitRequirement(name='network_head', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False), FitRequirement(name='network_backbone', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False), FitRequirement(name='network_embedding', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False)], '_cs_updates': {}, 'device': device(type='cpu'), 'network': Sequential(\n (0): _NoEmbedding()\n (1): Sequential(\n (0): Linear(in_features=10, out_features=200, bias=True)\n (1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU()\n (3): Linear(in_features=200, out_features=200, bias=True)\n (4): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (5): ReLU()\n (6): Linear(in_features=200, out_features=200, bias=True)\n (7): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (8): ReLU()\n (9): Linear(in_features=200, out_features=200, bias=True)\n (10): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (11): ReLU()\n (12): Linear(in_features=200, out_features=200, bias=True)\n )\n (2): Sequential(\n (0): Flatten(start_dim=1, end_dim=-1)\n (1): Linear(in_features=200, out_features=128, bias=True)\n (2): ReLU()\n (3): Linear(in_features=128, out_features=1, bias=True)\n )\n ), 'final_activation': None, 'is_fitted_': True})\n \n 12-) network_init: \n XavierInit\n \n 13-) optimizer: \n Adam ({'random_state': RandomState(MT19937) at 0x19465E140, '_fit_requirements': [FitRequirement(name='network', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False)], '_cs_updates': {}, 'optimizer': Adam (\n Parameter Group 0\n amsgrad: False\n betas: (0.9, 0.9)\n capturable: False\n eps: 1e-08\n foreach: None\n lr: 0.01\n maximize: False\n weight_decay: 0.0\n ), 'lr': 0.01, 'beta1': 0.9, 'beta2': 0.9, 'weight_decay': 0.0})\n \n 14-) lr_scheduler: \n ReduceLROnPlateau\n \n 15-) data_loader: \n DataLoader\n \n 16-) trainer: \n autoPyTorch.pipeline Standard Trainer\n \n ________________________________________)\napi.get_models_with_weights()[2]\n\n(0.16,\n MyTraditionalTabularRegressionPipeline(config='svm',\n dataset_properties={'categorical_columns': [],\n 'categories': [],\n 'input_shape': (10,),\n 'is_small_preprocess': True,\n 'issigned': True,\n 'issparse': False,\n 'numerical_columns': [0,\n 1,\n 2,\n 3,\n 4,\n 5,\n 6,\n 7,\n 8,\n 9],\n 'output_shape': 1,\n 'output_type': 'continuous',\n 'target_type': 'tabular_regression',\n 'task_type': 'tabular_regression'},\n init_params={'instance': None},\n random_state=RandomState(MT19937) at 0x194711B40))\nWhat if we fit only NNs?\napi2 = TabularRegressionTask(seed=2, ensemble_size=0)\n\napi2.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='r2',\n total_walltime_limit=40,\n func_eval_time_limit_secs=10,\n dataset_name=\"Diabetes\",\n memory_limit=None,\n enable_traditional_pipeline=False,\n )\n\n[WARNING] [2023-02-27 18:29:06,260:Client-autoPyTorch.automl_common.common.utils.backend] Directory /var/folders/1x/wmgn24mn1bbd2vgbqlk98tbc0000gn/T/autoPyTorch_tmp_70d9fdb4-b69e-11ed-b5ea-3c7d0a00e5d9/.autoPyTorch/ensembles does not exist\n[ERROR] [2023-02-27 18:29:06,261:Client-AutoPyTorch:Diabetes:2] No valid ensemble was created. Please check the logfile for errors. Default to the best individual estimator:[(2, 2, 5.555555555555555)]\nNoneType: None\n\n\n<autoPyTorch.api.tabular_regression.TabularRegressionTask at 0x194d3f160>\ny_pred2 = api2.predict(X_test)\nscore2 = api2.score(y_pred2, y_test)\nscore2\n\n{'r2': -0.37656772470491995}\nnp.sqrt(mean_squared_error(y_test, y_pred2))\n\n84.77782906691597\nOkay, it seems we are worse than the random forest. Let’s see what happened.\napi2.get_models_with_weights()[0]\n\n(1.0,\n ________________________________________\n TabularRegressionPipeline\n ________________________________________\n 0-) imputer: \n SimpleImputer\n \n 1-) variance_threshold: \n VarianceThreshold\n \n 2-) coalescer: \n NoCoalescer\n \n 3-) encoder: \n NoEncoder\n \n 4-) scaler: \n StandardScaler\n \n 5-) feature_preprocessor: \n NoFeaturePreprocessor\n \n 6-) tabular_transformer: \n TabularColumnTransformer\n \n 7-) preprocessing: \n EarlyPreprocessing\n \n 8-) network_embedding: \n autoPyTorch.pipeline NoEmbedding\n \n 9-) network_backbone: \n autoPyTorch.pipeline ShapedMLPBackbone\n \n 10-) network_head: \n autoPyTorch.pipeline FullyConnectedHead\n \n 11-) network: \n Sequential ({'random_state': RandomState(MT19937) at 0x194711A40, '_fit_requirements': [FitRequirement(name='network_head', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False), FitRequirement(name='network_backbone', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False), FitRequirement(name='network_embedding', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False)], '_cs_updates': {}, 'device': device(type='cpu'), 'network': Sequential(\n (0): _NoEmbedding()\n (1): Sequential(\n (0): Linear(in_features=10, out_features=200, bias=True)\n (1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU()\n (3): Linear(in_features=200, out_features=200, bias=True)\n (4): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (5): ReLU()\n (6): Linear(in_features=200, out_features=200, bias=True)\n (7): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (8): ReLU()\n (9): Linear(in_features=200, out_features=200, bias=True)\n (10): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (11): ReLU()\n (12): Linear(in_features=200, out_features=200, bias=True)\n )\n (2): Sequential(\n (0): Flatten(start_dim=1, end_dim=-1)\n (1): Linear(in_features=200, out_features=128, bias=True)\n (2): ReLU()\n (3): Linear(in_features=128, out_features=1, bias=True)\n )\n ), 'final_activation': None, 'is_fitted_': True})\n \n 12-) network_init: \n XavierInit\n \n 13-) optimizer: \n Adam ({'random_state': RandomState(MT19937) at 0x194711A40, '_fit_requirements': [FitRequirement(name='network', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False)], '_cs_updates': {}, 'optimizer': Adam (\n Parameter Group 0\n amsgrad: False\n betas: (0.9, 0.9)\n capturable: False\n eps: 1e-08\n foreach: None\n lr: 0.01\n maximize: False\n weight_decay: 0.0\n ), 'lr': 0.01, 'beta1': 0.9, 'beta2': 0.9, 'weight_decay': 0.0})\n \n 14-) lr_scheduler: \n ReduceLROnPlateau\n \n 15-) data_loader: \n DataLoader\n \n 16-) trainer: \n autoPyTorch.pipeline Standard Trainer\n \n ________________________________________)\nimport torch\nX_train_torch = torch.from_numpy(X_train.values).float()\ny_train_torch = torch.from_numpy(y_train.values).float()\nX_val_torch = torch.from_numpy(X_valid.values).float()\ny_val_torch = torch.from_numpy(y_valid.values).float()\nX_test_torch = torch.from_numpy(X_test.values).float()\ny_test_torch = torch.from_numpy(y_test.values).float()\n# Build a simple MLP in PyTorch, train on training data and optimize on validation data\n\n\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.optim as optim\n\nclass Net(nn.Module):\n def __init__(self, l1 = 64, l2 = 64, l3 = 64, l2_reg = 0.0):\n super(Net, self).__init__()\n self.l2_reg = l2_reg\n self.fc1 = nn.Linear(10, l1)\n self.fc2 = nn.Linear(l1, l2)\n self.fc3 = nn.Linear(l2, l3)\n self.fc4 = nn.Linear(l3, 1)\n\n def forward(self, x):\n # Add a residual connection\n x = F.relu(self.fc1(x))\n x = F.relu(self.fc2(x))\n x = F.relu(self.fc3(x))\n x = self.fc4(x)\n return x\n \n\nnet = Net()\ncriterion = nn.MSELoss()\noptimizer = optim.Adam(net.parameters(), lr=0.001)\n\n\n\ntrain_losses = []\nval_losses = []\ntest_losses = []\n\n\nfor epoch in range(1000): # loop over the dataset multiple times and store the train, test loss\n optimizer.zero_grad()\n outputs = net(X_train_torch)\n loss = criterion(outputs, y_train_torch)\n # Add L2 regularization\n for param in net.parameters():\n loss += net.l2_reg * torch.norm(param)\n loss.backward()\n optimizer.step()\n\n train_losses.append(np.sqrt(loss.item()))\n val_losses.append(np.sqrt(criterion(net(X_val_torch), y_val_torch).item()))\n test_losses.append(np.sqrt(criterion(net(X_test_torch), y_test_torch).item()))\n \nprint('Finished Training')\n\nFinished Training\nplt.plot(train_losses, label=\"Train\")\nplt.plot(test_losses, label=\"Test\")\nplt.plot(val_losses, label=\"Val\")\nplt.legend()\nwith torch.no_grad():\n outputs = net(X_test_torch)\n loss = criterion(outputs, y_test_torch)\n print(f\"Test loss: {np.sqrt(loss.item())}\")\n outputs = net(X_train_torch) \n loss = criterion(outputs, y_train_torch)\n print(f\"Train loss: {np.sqrt(loss.item())}\")\n print(\"\")\n\nTest loss: 72.43299031475574\nTrain loss: 79.2251754050993\n# Fit the NN model using scaled y values\n# Using sklearn's StandardScaler to scale the y values\n\nfrom sklearn.preprocessing import StandardScaler\n\nscaler = StandardScaler()\ny_train_scaled = scaler.fit_transform(y_train.values.reshape(-1, 1))\ny_train_scaled = torch.from_numpy(y_train_scaled).float()\ny_test_scaled = scaler.transform(y_test.values.reshape(-1, 1))\ny_test_scaled = torch.from_numpy(y_test_scaled).float()\ny_valid_scaled = scaler.transform(y_valid.values.reshape(-1, 1))\ny_valid_scaled = torch.from_numpy(y_valid_scaled).float()\n\nnet = Net()\ncriterion = nn.MSELoss()\noptimizer = optim.Adam(net.parameters(), lr=0.01)\n\ntrain_losses = []\nval_losses = []\ntest_losses = []\n\n\nfor epoch in range(4000): # loop over the dataset multiple times and store the train, test loss\n optimizer.zero_grad()\n outputs = net(X_train_torch)\n loss = criterion(outputs, y_train_scaled)\n # Add L2 regularization\n for param in net.parameters():\n loss += net.l2_reg * torch.norm(param)\n loss.backward()\n optimizer.step()\n\n train_losses.append(np.sqrt(loss.item()))\n\n \nprint('Finished Training')\n\n\nFinished Training\nplt.plot(train_losses, label=\"Train\")\n# Predict the scaled y values and inverse transform them to get the original y values\n\nwith torch.no_grad():\n outputs = net(X_test_torch)\n# Inverse transform the scaled y values to get the original y values\n\ny_pred = scaler.inverse_transform(outputs.numpy())\ny_pred = y_pred.reshape(-1)\nprint(f\"Test RMSE: {np.sqrt(mean_squared_error(y_test, y_pred))}\")\n\nTest RMSE: 82.79070717489064\npd.DataFrame({\"y_test\": y_test, \"y_pred\": y_pred}).plot.scatter(x=\"y_test\", y=\"y_pred\")\n# At this point it seems our model is worse than RF, Auto model containing Catboost etc. Let us optimize our NN\n# I will use Ray Tune to optimize the hyperparameters\n\nimport ray\nfrom ray import tune\nfrom ray.tune import CLIReporter\nfrom ray.tune.schedulers import ASHAScheduler\n\ndef train_mlp(config):\n net = Net(config[\"l1\"], config[\"l2\"], config[\"l3\"], config[\"l2_reg\"])\n \n criterion = nn.MSELoss()\n \n criterion = nn.MSELoss()\n optimizer = optim.Adam(net.parameters(), lr=config[\"lr\"])\n for epoch in range(1000): # loop over the dataset multiple times and store the train, test loss\n optimizer.zero_grad()\n outputs = net(X_train_torch)\n loss = criterion(outputs, y_train_scaled)\n # Add l2 regularization to loss\n l2_reg = torch.tensor(0.)\n for param in net.parameters():\n l2_reg += torch.norm(param)\n loss += config[\"l2_reg\"] * l2_reg\n loss.backward()\n optimizer.step()\n with torch.no_grad():\n outputs = net(X_val_torch)\n # Inverse transform the scaled y values to get the original y values\n y_pred = scaler.inverse_transform(outputs.numpy())\n loss = np.sqrt(mean_squared_error(y_valid, y_pred))\n return {\"loss\": loss, \"status\": \"ok\"}\nray.shutdown()\n\nray.init()\nscheduler = ASHAScheduler(\n metric=\"loss\",\n mode=\"min\",\n max_t=100,\n grace_period=1,\n reduction_factor=2)\n\nreporter = CLIReporter(\n parameter_columns=[\"l1\", \"l2\", \"l3\", \"lr\", \"l2_reg\"],\n metric_columns=[\"loss\", \"training_iteration\"])\n\nanalysis = tune.run(\n train_mlp,\n resources_per_trial={\"cpu\": 1},\n config={\n \"l1\": tune.choice([2, 4, 16, 32, 64, 128, 256]),\n \"l2\": tune.choice([2, 4, 16, 32, 64, 128, 256]),\n \"l3\": tune.choice([2, 4, 16, 32, 64, 128, 256]),\n \"l2_reg\": tune.loguniform(1e-4, 1.0),\n \"lr\": tune.loguniform(1e-4, 1.0),\n },\n num_samples=50,\n scheduler=scheduler,\n progress_reporter=reporter)\n\n\n\n\n Trial Progress\n \n\n\n\nTrial name\ndate\ndone\nepisodes_total\nexperiment_id\nhostname\niterations_since_restore\nloss\nnode_ip\npid\nstatus\ntime_since_restore\ntime_this_iter_s\ntime_total_s\ntimestamp\ntimesteps_since_restore\ntimesteps_total\ntraining_iteration\ntrial_id\nwarmup_time\n\n\n\n\ntrain_mlp_c8d42_00000\n2023-02-28_10-08-51\nTrue\n\n684a45c7048544b384d756bd20117055\nNipuns-iMac-8.local\n1\n76.2275\n127.0.0.1\n99588\nok\n2.44738\n2.44738\n2.44738\n1677559131\n0\n\n1\nc8d42_00000\n0.00341606\n\n\ntrain_mlp_c8d42_00001\n2023-02-28_10-08-59\nTrue\n\n23a98dd3be3d4ad2abbc0a44970bd41b\nNipuns-iMac-8.local\n1\n76.2183\n127.0.0.1\n99594\nok\n4.5987\n4.5987\n4.5987\n1677559139\n0\n\n1\nc8d42_00001\n0.00891805\n\n\ntrain_mlp_c8d42_00002\n2023-02-28_10-08-58\nTrue\n\nf1614c13b37a4966adb785c911a54c11\nNipuns-iMac-8.local\n1\n76.2261\n127.0.0.1\n99595\nok\n2.89705\n2.89705\n2.89705\n1677559138\n0\n\n1\nc8d42_00002\n0.0356119\n\n\ntrain_mlp_c8d42_00003\n2023-02-28_10-08-57\nTrue\n\ne9475ee5dd1f4d1289ef144d1a7879ad\nNipuns-iMac-8.local\n1\n51.6545\n127.0.0.1\n99596\nok\n2.83387\n2.83387\n2.83387\n1677559137\n0\n\n1\nc8d42_00003\n0.00670218\n\n\ntrain_mlp_c8d42_00004\n2023-02-28_10-08-57\nTrue\n\n97bdec82d1ec44069bc84210c4a6a18e\nNipuns-iMac-8.local\n1\n65.3017\n127.0.0.1\n99597\nok\n2.6755\n2.6755\n2.6755\n1677559137\n0\n\n1\nc8d42_00004\n0.00920701\n\n\ntrain_mlp_c8d42_00005\n2023-02-28_10-08-59\nTrue\n\n3db458192e6944118049623bfe3128a4\nNipuns-iMac-8.local\n1\n76.1747\n127.0.0.1\n99598\nok\n4.36245\n4.36245\n4.36245\n1677559139\n0\n\n1\nc8d42_00005\n0.00544381\n\n\ntrain_mlp_c8d42_00006\n2023-02-28_10-08-59\nTrue\n\n6b794ba131304c9e841b2d671a1a4693\nNipuns-iMac-8.local\n1\n76.2181\n127.0.0.1\n99601\nok\n4.51009\n4.51009\n4.51009\n1677559139\n0\n\n1\nc8d42_00006\n0.00939798\n\n\ntrain_mlp_c8d42_00007\n2023-02-28_10-09-06\nTrue\n\n38d637c01fcb43d3be8db783cb837ea2\nNipuns-iMac-8.local\n1\n76.7874\n127.0.0.1\n99602\nok\n11.1744\n11.1744\n11.1744\n1677559146\n0\n\n1\nc8d42_00007\n0.012325\n\n\ntrain_mlp_c8d42_00008\n2023-02-28_10-09-00\nTrue\n\na15c3445ce1e4ffeba2eabf9098820f2\nNipuns-iMac-8.local\n1\n76.1934\n127.0.0.1\n99604\nok\n4.98043\n4.98043\n4.98043\n1677559140\n0\n\n1\nc8d42_00008\n0.010926\n\n\ntrain_mlp_c8d42_00009\n2023-02-28_10-09-00\nTrue\n\n9de7e974434443f6a12e1dbc614a8582\nNipuns-iMac-8.local\n1\n68.2169\n127.0.0.1\n99612\nok\n5.00434\n5.00434\n5.00434\n1677559140\n0\n\n1\nc8d42_00009\n0.00496888\n\n\ntrain_mlp_c8d42_00010\n2023-02-28_10-09-00\nTrue\n\nd7fa3bf71c4d44dd90e2988c0e59687a\nNipuns-iMac-8.local\n1\n76.2198\n127.0.0.1\n99613\nok\n5.70828\n5.70828\n5.70828\n1677559140\n0\n\n1\nc8d42_00010\n0.00938773\n\n\ntrain_mlp_c8d42_00011\n2023-02-28_10-09-03\nTrue\n\n6e08564fbc6d4f5c9bea59429a196a5e\nNipuns-iMac-8.local\n1\n52.2075\n127.0.0.1\n99614\nok\n8.02228\n8.02228\n8.02228\n1677559143\n0\n\n1\nc8d42_00011\n0.00922298\n\n\ntrain_mlp_c8d42_00012\n2023-02-28_10-08-59\nTrue\n\n98349fab3a1b4027bc14d3ce239251fd\nNipuns-iMac-8.local\n1\n76.1125\n127.0.0.1\n99615\nok\n4.87773\n4.87773\n4.87773\n1677559139\n0\n\n1\nc8d42_00012\n0.00547719\n\n\ntrain_mlp_c8d42_00013\n2023-02-28_10-08-58\nTrue\n\n6356d7d5564f49ac84f1f5221bccaa3e\nNipuns-iMac-8.local\n1\n76.2163\n127.0.0.1\n99616\nok\n3.31126\n3.31126\n3.31126\n1677559138\n0\n\n1\nc8d42_00013\n0.0112782\n\n\ntrain_mlp_c8d42_00014\n2023-02-28_10-09-00\nTrue\n\ne4c8bef4a7e246c19960353c87f60541\nNipuns-iMac-8.local\n1\n76.2002\n127.0.0.1\n99617\nok\n5.44697\n5.44697\n5.44697\n1677559140\n0\n\n1\nc8d42_00014\n0.0142629\n\n\ntrain_mlp_c8d42_00015\n2023-02-28_10-09-01\nTrue\n\nf91e68c1ff9a486f9f74c54403e236f4\nNipuns-iMac-8.local\n1\n51.5808\n127.0.0.1\n99618\nok\n5.94224\n5.94224\n5.94224\n1677559141\n0\n\n1\nc8d42_00015\n0.00973487\n\n\ntrain_mlp_c8d42_00016\n2023-02-28_10-08-59\nTrue\n\n684a45c7048544b384d756bd20117055\nNipuns-iMac-8.local\n1\n85.4019\n127.0.0.1\n99588\nok\n4.64298\n4.64298\n4.64298\n1677559139\n0\n\n1\nc8d42_00016\n0.00341606\n\n\ntrain_mlp_c8d42_00017\n2023-02-28_10-09-01\nTrue\n\n97bdec82d1ec44069bc84210c4a6a18e\nNipuns-iMac-8.local\n1\n76.2217\n127.0.0.1\n99597\nok\n3.26168\n3.26168\n3.26168\n1677559141\n0\n\n1\nc8d42_00017\n0.00920701\n\n\ntrain_mlp_c8d42_00018\n2023-02-28_10-09-00\nTrue\n\ne9475ee5dd1f4d1289ef144d1a7879ad\nNipuns-iMac-8.local\n1\n52.4798\n127.0.0.1\n99596\nok\n2.94631\n2.94631\n2.94631\n1677559140\n0\n\n1\nc8d42_00018\n0.00670218\n\n\ntrain_mlp_c8d42_00019\n2023-02-28_10-09-10\nTrue\n\nf1614c13b37a4966adb785c911a54c11\nNipuns-iMac-8.local\n1\n51.5559\n127.0.0.1\n99595\nok\n12.1603\n12.1603\n12.1603\n1677559150\n0\n\n1\nc8d42_00019\n0.0356119\n\n\ntrain_mlp_c8d42_00020\n2023-02-28_10-09-04\nTrue\n\n6356d7d5564f49ac84f1f5221bccaa3e\nNipuns-iMac-8.local\n1\n54.8401\n127.0.0.1\n99616\nok\n5.67631\n5.67631\n5.67631\n1677559144\n0\n\n1\nc8d42_00020\n0.0112782\n\n\ntrain_mlp_c8d42_00021\n2023-02-28_10-09-03\nTrue\n\n3db458192e6944118049623bfe3128a4\nNipuns-iMac-8.local\n1\n80.9971\n127.0.0.1\n99598\nok\n4.02288\n4.02288\n4.02288\n1677559143\n0\n\n1\nc8d42_00021\n0.00544381\n\n\ntrain_mlp_c8d42_00022\n2023-02-28_10-09-04\nTrue\n\n6b794ba131304c9e841b2d671a1a4693\nNipuns-iMac-8.local\n1\n62.7729\n127.0.0.1\n99601\nok\n5.19184\n5.19184\n5.19184\n1677559144\n0\n\n1\nc8d42_00022\n0.00939798\n\n\ntrain_mlp_c8d42_00023\n2023-02-28_10-09-03\nTrue\n\n23a98dd3be3d4ad2abbc0a44970bd41b\nNipuns-iMac-8.local\n1\n52.487\n127.0.0.1\n99594\nok\n3.83576\n3.83576\n3.83576\n1677559143\n0\n\n1\nc8d42_00023\n0.00891805\n\n\ntrain_mlp_c8d42_00024\n2023-02-28_10-09-09\nTrue\n\n684a45c7048544b384d756bd20117055\nNipuns-iMac-8.local\n1\n76.1853\n127.0.0.1\n99588\nok\n10.1193\n10.1193\n10.1193\n1677559149\n0\n\n1\nc8d42_00024\n0.00341606\n\n\ntrain_mlp_c8d42_00025\n2023-02-28_10-09-04\nTrue\n\n98349fab3a1b4027bc14d3ce239251fd\nNipuns-iMac-8.local\n1\n76.2173\n127.0.0.1\n99615\nok\n4.14031\n4.14031\n4.14031\n1677559144\n0\n\n1\nc8d42_00025\n0.00547719\n\n\ntrain_mlp_c8d42_00026\n2023-02-28_10-09-06\nTrue\n\na15c3445ce1e4ffeba2eabf9098820f2\nNipuns-iMac-8.local\n1\n68.3927\n127.0.0.1\n99604\nok\n6.18441\n6.18441\n6.18441\n1677559146\n0\n\n1\nc8d42_00026\n0.010926\n\n\ntrain_mlp_c8d42_00027\n2023-02-28_10-09-03\nTrue\n\n9de7e974434443f6a12e1dbc614a8582\nNipuns-iMac-8.local\n1\n76.218\n127.0.0.1\n99612\nok\n3.01754\n3.01754\n3.01754\n1677559143\n0\n\n1\nc8d42_00027\n0.00496888\n\n\ntrain_mlp_c8d42_00028\n2023-02-28_10-09-06\nTrue\n\ne4c8bef4a7e246c19960353c87f60541\nNipuns-iMac-8.local\n1\n66.4336\n127.0.0.1\n99617\nok\n5.79445\n5.79445\n5.79445\n1677559146\n0\n\n1\nc8d42_00028\n0.0142629\n\n\ntrain_mlp_c8d42_00029\n2023-02-28_10-09-03\nTrue\n\nd7fa3bf71c4d44dd90e2988c0e59687a\nNipuns-iMac-8.local\n1\n76.2164\n127.0.0.1\n99613\nok\n2.94911\n2.94911\n2.94911\n1677559143\n0\n\n1\nc8d42_00029\n0.00938773\n\n\ntrain_mlp_c8d42_00030\n2023-02-28_10-09-05\nTrue\n\ne9475ee5dd1f4d1289ef144d1a7879ad\nNipuns-iMac-8.local\n1\n75.2123\n127.0.0.1\n99596\nok\n4.99429\n4.99429\n4.99429\n1677559145\n0\n\n1\nc8d42_00030\n0.00670218\n\n\ntrain_mlp_c8d42_00031\n2023-02-28_10-09-06\nTrue\n\nf91e68c1ff9a486f9f74c54403e236f4\nNipuns-iMac-8.local\n1\n51.925\n127.0.0.1\n99618\nok\n4.96918\n4.96918\n4.96918\n1677559146\n0\n\n1\nc8d42_00031\n0.00973487\n\n\ntrain_mlp_c8d42_00032\n2023-02-28_10-09-04\nTrue\n\n97bdec82d1ec44069bc84210c4a6a18e\nNipuns-iMac-8.local\n1\n76.4117\n127.0.0.1\n99597\nok\n3.24968\n3.24968\n3.24968\n1677559144\n0\n\n1\nc8d42_00032\n0.00920701\n\n\ntrain_mlp_c8d42_00033\n2023-02-28_10-09-08\nTrue\n\n6e08564fbc6d4f5c9bea59429a196a5e\nNipuns-iMac-8.local\n1\n70.9285\n127.0.0.1\n99614\nok\n5.80603\n5.80603\n5.80603\n1677559148\n0\n\n1\nc8d42_00033\n0.00922298\n\n\ntrain_mlp_c8d42_00034\n2023-02-28_10-09-08\nTrue\n\n9de7e974434443f6a12e1dbc614a8582\nNipuns-iMac-8.local\n1\n51.9675\n127.0.0.1\n99612\nok\n5.20552\n5.20552\n5.20552\n1677559148\n0\n\n1\nc8d42_00034\n0.00496888\n\n\ntrain_mlp_c8d42_00035\n2023-02-28_10-09-07\nTrue\n\n3db458192e6944118049623bfe3128a4\nNipuns-iMac-8.local\n1\n76.1776\n127.0.0.1\n99598\nok\n3.86612\n3.86612\n3.86612\n1677559147\n0\n\n1\nc8d42_00035\n0.00544381\n\n\ntrain_mlp_c8d42_00036\n2023-02-28_10-09-07\nTrue\n\n23a98dd3be3d4ad2abbc0a44970bd41b\nNipuns-iMac-8.local\n1\n76.2167\n127.0.0.1\n99594\nok\n3.56161\n3.56161\n3.56161\n1677559147\n0\n\n1\nc8d42_00036\n0.00891805\n\n\ntrain_mlp_c8d42_00037\n2023-02-28_10-09-07\nTrue\n\nd7fa3bf71c4d44dd90e2988c0e59687a\nNipuns-iMac-8.local\n1\n51.8721\n127.0.0.1\n99613\nok\n4.08222\n4.08222\n4.08222\n1677559147\n0\n\n1\nc8d42_00037\n0.00938773\n\n\ntrain_mlp_c8d42_00038\n2023-02-28_10-09-10\nTrue\n\n98349fab3a1b4027bc14d3ce239251fd\nNipuns-iMac-8.local\n1\n76.2166\n127.0.0.1\n99615\nok\n6.54892\n6.54892\n6.54892\n1677559150\n0\n\n1\nc8d42_00038\n0.00547719\n\n\ntrain_mlp_c8d42_00039\n2023-02-28_10-09-07\nTrue\n\n6356d7d5564f49ac84f1f5221bccaa3e\nNipuns-iMac-8.local\n1\n71.0616\n127.0.0.1\n99616\nok\n3.14067\n3.14067\n3.14067\n1677559147\n0\n\n1\nc8d42_00039\n0.0112782\n\n\ntrain_mlp_c8d42_00040\n2023-02-28_10-09-07\nTrue\n\n97bdec82d1ec44069bc84210c4a6a18e\nNipuns-iMac-8.local\n1\n78.2332\n127.0.0.1\n99597\nok\n3.1038\n3.1038\n3.1038\n1677559147\n0\n\n1\nc8d42_00040\n0.00920701\n\n\ntrain_mlp_c8d42_00041\n2023-02-28_10-09-09\nTrue\n\n6b794ba131304c9e841b2d671a1a4693\nNipuns-iMac-8.local\n1\n76.2681\n127.0.0.1\n99601\nok\n4.69843\n4.69843\n4.69843\n1677559149\n0\n\n1\nc8d42_00041\n0.00939798\n\n\ntrain_mlp_c8d42_00042\n2023-02-28_10-09-09\nTrue\n\ne9475ee5dd1f4d1289ef144d1a7879ad\nNipuns-iMac-8.local\n1\n64.8587\n127.0.0.1\n99596\nok\n3.2481\n3.2481\n3.2481\n1677559149\n0\n\n1\nc8d42_00042\n0.00670218\n\n\ntrain_mlp_c8d42_00043\n2023-02-28_10-09-09\nTrue\n\nf91e68c1ff9a486f9f74c54403e236f4\nNipuns-iMac-8.local\n1\n76.2166\n127.0.0.1\n99618\nok\n2.99345\n2.99345\n2.99345\n1677559149\n0\n\n1\nc8d42_00043\n0.00973487\n\n\ntrain_mlp_c8d42_00044\n2023-02-28_10-09-09\nTrue\n\n38d637c01fcb43d3be8db783cb837ea2\nNipuns-iMac-8.local\n1\n76.2244\n127.0.0.1\n99602\nok\n3.30911\n3.30911\n3.30911\n1677559149\n0\n\n1\nc8d42_00044\n0.012325\n\n\ntrain_mlp_c8d42_00045\n2023-02-28_10-09-10\nTrue\n\na15c3445ce1e4ffeba2eabf9098820f2\nNipuns-iMac-8.local\n1\n51.9337\n127.0.0.1\n99604\nok\n3.84642\n3.84642\n3.84642\n1677559150\n0\n\n1\nc8d42_00045\n0.010926\n\n\ntrain_mlp_c8d42_00046\n2023-02-28_10-09-10\nTrue\n\ne4c8bef4a7e246c19960353c87f60541\nNipuns-iMac-8.local\n1\n76.2167\n127.0.0.1\n99617\nok\n3.67529\n3.67529\n3.67529\n1677559150\n0\n\n1\nc8d42_00046\n0.0142629\n\n\ntrain_mlp_c8d42_00047\n2023-02-28_10-09-09\nTrue\n\n23a98dd3be3d4ad2abbc0a44970bd41b\nNipuns-iMac-8.local\n1\n73.6947\n127.0.0.1\n99594\nok\n2.20929\n2.20929\n2.20929\n1677559149\n0\n\n1\nc8d42_00047\n0.00891805\n\n\ntrain_mlp_c8d42_00048\n2023-02-28_10-09-09\nTrue\n\n6356d7d5564f49ac84f1f5221bccaa3e\nNipuns-iMac-8.local\n1\n76.2164\n127.0.0.1\n99616\nok\n2.29378\n2.29378\n2.29378\n1677559149\n0\n\n1\nc8d42_00048\n0.0112782\n\n\ntrain_mlp_c8d42_00049\n2023-02-28_10-09-09\nTrue\n\n3db458192e6944118049623bfe3128a4\nNipuns-iMac-8.local\n1\n60.3637\n127.0.0.1\n99598\nok\n2.2852\n2.2852\n2.2852\n1677559149\n0\n\n1\nc8d42_00049\n0.00544381\n\n\n\n\n\n\n\n\n\n2023-02-28 10:09:11,027 INFO tune.py:762 -- Total run time: 26.09 seconds (25.84 seconds for the tuning loop).\n# Print the table for all the trials\ndf = analysis.results_df\n\n# Sort the table by loss\ndf.sort_values(by=\"loss\", inplace=True)\n\n# Show only loss, config/l1, config/l2, config/l3, config/lr, training_iteration\ndf[[\"loss\", \"config/l1\", \"config/l2\", \"config/l3\", \"config/lr\", \"config/l2_reg\"]]\n\n\n\n\n\n\n\n\n\nloss\nconfig/l1\nconfig/l2\nconfig/l3\nconfig/lr\nconfig/l2_reg\n\n\ntrial_id\n\n\n\n\n\n\n\n\n\n\nc8d42_00019\n51.555930\n256\n256\n128\n0.000125\n0.027615\n\n\nc8d42_00015\n51.580817\n4\n64\n256\n0.003980\n0.015450\n\n\nc8d42_00003\n51.654479\n2\n64\n4\n0.000991\n0.015054\n\n\nc8d42_00037\n51.872128\n128\n32\n32\n0.000114\n0.005181\n\n\nc8d42_00031\n51.925027\n128\n64\n32\n0.000103\n0.001919\n\n\nc8d42_00045\n51.933658\n64\n256\n16\n0.000139\n0.000221\n\n\nc8d42_00034\n51.967532\n2\n128\n128\n0.000371\n0.000860\n\n\nc8d42_00011\n52.207469\n16\n256\n128\n0.000134\n0.001002\n\n\nc8d42_00018\n52.479806\n2\n16\n4\n0.002317\n0.000166\n\n\nc8d42_00023\n52.486995\n64\n64\n16\n0.000214\n0.025084\n\n\nc8d42_00020\n54.840118\n2\n128\n128\n0.001178\n0.001264\n\n\nc8d42_00049\n60.363672\n4\n128\n16\n0.003238\n0.000639\n\n\nc8d42_00022\n62.772853\n256\n4\n64\n0.000419\n0.000484\n\n\nc8d42_00042\n64.858708\n128\n32\n2\n0.001164\n0.000313\n\n\nc8d42_00004\n65.301650\n4\n2\n32\n0.071916\n0.000168\n\n\nc8d42_00028\n66.433568\n128\n64\n128\n0.013376\n0.002368\n\n\nc8d42_00009\n68.216911\n32\n256\n2\n0.000931\n0.001888\n\n\nc8d42_00026\n68.392660\n2\n256\n64\n0.025460\n0.000803\n\n\nc8d42_00033\n70.928476\n2\n256\n64\n0.016914\n0.000214\n\n\nc8d42_00039\n71.061583\n16\n4\n64\n0.007347\n0.000135\n\n\nc8d42_00047\n73.694689\n4\n2\n32\n0.000104\n0.001308\n\n\nc8d42_00030\n75.212274\n16\n256\n2\n0.004354\n0.000746\n\n\nc8d42_00012\n76.112538\n128\n128\n4\n0.226784\n0.140003\n\n\nc8d42_00005\n76.174661\n4\n4\n256\n0.086190\n0.563438\n\n\nc8d42_00035\n76.177571\n64\n4\n128\n0.166371\n0.330122\n\n\nc8d42_00024\n76.185294\n32\n256\n256\n0.159865\n0.671139\n\n\nc8d42_00008\n76.193428\n256\n32\n16\n0.113514\n0.062898\n\n\nc8d42_00014\n76.200245\n32\n128\n128\n0.561836\n0.000226\n\n\nc8d42_00013\n76.216345\n64\n16\n64\n0.000173\n0.194086\n\n\nc8d42_00048\n76.216390\n64\n16\n64\n0.301651\n0.000141\n\n\nc8d42_00029\n76.216436\n16\n2\n2\n0.085926\n0.000206\n\n\nc8d42_00038\n76.216618\n16\n256\n256\n0.005644\n0.210313\n\n\nc8d42_00043\n76.216619\n64\n16\n64\n0.000284\n0.433444\n\n\nc8d42_00046\n76.216670\n256\n64\n2\n0.000617\n0.181541\n\n\nc8d42_00036\n76.216674\n2\n64\n64\n0.680740\n0.005030\n\n\nc8d42_00025\n76.217288\n16\n128\n16\n0.002700\n0.051269\n\n\nc8d42_00027\n76.218050\n2\n4\n32\n0.001925\n0.194493\n\n\nc8d42_00006\n76.218069\n256\n2\n64\n0.003326\n0.233648\n\n\nc8d42_00001\n76.218267\n2\n16\n256\n0.122404\n0.001683\n\n\nc8d42_00010\n76.219815\n128\n128\n64\n0.006478\n0.178694\n\n\nc8d42_00017\n76.221708\n32\n32\n16\n0.004100\n0.520138\n\n\nc8d42_00044\n76.224417\n256\n2\n2\n0.019401\n0.077370\n\n\nc8d42_00002\n76.226116\n16\n2\n64\n0.000326\n0.028026\n\n\nc8d42_00000\n76.227517\n4\n16\n128\n0.020000\n0.307903\n\n\nc8d42_00041\n76.268058\n256\n32\n64\n0.045152\n0.252533\n\n\nc8d42_00032\n76.411740\n32\n32\n2\n0.541939\n0.193151\n\n\nc8d42_00007\n76.787387\n256\n256\n2\n0.616285\n0.114236\n\n\nc8d42_00040\n78.233233\n64\n4\n16\n0.164726\n0.000119\n\n\nc8d42_00021\n80.997064\n128\n16\n32\n0.001292\n0.002832\n\n\nc8d42_00016\n85.401864\n32\n16\n256\n0.055791\n0.000965\n# Print the best hyperparameters\n\nanalysis.get_best_config(metric=\"loss\", mode=\"min\")\n\n{'l1': 256,\n 'l2': 256,\n 'l3': 128,\n 'l2_reg': 0.027614886800457164,\n 'lr': 0.00012453571993239395}\n# Perform the final test on the test set\n\nbest_config = analysis.get_best_config(metric=\"loss\", mode=\"min\")\n\n# Use the best hyperparameters to train the model\nnet = Net(best_config[\"l1\"], best_config[\"l2\"], best_config[\"l3\"], best_config[\"l2_reg\"])\ncriterion = nn.MSELoss()\noptimizer = optim.Adam(net.parameters(), lr=best_config[\"lr\"])\nfor epoch in range(1000): # loop over the dataset multiple times and store the train, test loss\n optimizer.zero_grad()\n outputs = net(X_train_torch)\n loss = criterion(outputs, y_train_scaled)\n # Add L2 regularization\n for param in net.parameters():\n loss += net.l2_reg * torch.norm(param)\n\n \n loss.backward()\n optimizer.step()\n\nwith torch.no_grad():\n outputs = net(X_test_torch)\n # Inverse transform the scaled y values to get the original y values\n y_pred = scaler.inverse_transform(outputs.numpy())\n loss = np.sqrt(mean_squared_error(y_test, y_pred))\n print(f\"Test RMSE: {loss}\")\n print(\"\")\n \n\nTest RMSE: 54.06271747689579\npd.DataFrame({\"y_test\": y_test, \"y_pred\": y_pred.reshape(-1)})\n\n\n\n\n\n\n\n\n\ny_test\ny_pred\n\n\n\n\n246\n78.0\n107.765800\n\n\n425\n152.0\n110.693916\n\n\n293\n200.0\n175.535324\n\n\n31\n59.0\n82.328522\n\n\n359\n311.0\n176.440598\n\n\n...\n...\n...\n\n\n117\n281.0\n246.310699\n\n\n139\n281.0\n245.451309\n\n\n218\n214.0\n118.843346\n\n\n93\n96.0\n77.727440\n\n\n420\n146.0\n134.773529\n\n\n\n\n111 rows × 2 columns\n# Thus far it seems even with hyperparameter tuning we are unable to match the performance of ensemble models. \n\n# Get the top 5 configurations\n\ndf[['loss', 'config/l1', 'config/l2', 'config/l3', 'config/lr', 'config/l2_reg']].head(5)\n\n\n\n\n\n\n\n\n\nloss\nconfig/l1\nconfig/l2\nconfig/l3\nconfig/lr\nconfig/l2_reg\n\n\ntrial_id\n\n\n\n\n\n\n\n\n\n\nc8d42_00019\n51.555930\n256\n256\n128\n0.000125\n0.027615\n\n\nc8d42_00015\n51.580817\n4\n64\n256\n0.003980\n0.015450\n\n\nc8d42_00003\n51.654479\n2\n64\n4\n0.000991\n0.015054\n\n\nc8d42_00037\n51.872128\n128\n32\n32\n0.000114\n0.005181\n\n\nc8d42_00031\n51.925027\n128\n64\n32\n0.000103\n0.001919\nconfig_list_of_dicts = df[['config/l1', 'config/l2', 'config/l3', 'config/lr', 'config/l2_reg']].head(5).to_dict('records')\n\n# Train an ensemble of 5 models using the top 5 configurations\n\nensemble = []\nfor config in config_list_of_dicts:\n net = Net(config[\"config/l1\"], config[\"config/l2\"], config[\"config/l3\"], config[\"config/l2_reg\"])\n criterion = nn.MSELoss()\n optimizer = optim.Adam(net.parameters(), lr=config[\"config/lr\"])\n for epoch in range(2000): # loop over the dataset multiple times and store the train, test loss\n optimizer.zero_grad()\n outputs = net(X_train_torch)\n loss = criterion(outputs, y_train_scaled)\n # Add L2 regularization\n for param in net.parameters():\n loss += net.l2_reg * torch.norm(param)\n loss.backward()\n optimizer.step()\n ensemble.append(net)\n# Get the predictions from the ensemble\n\nensemble_preds = []\nfor net in ensemble:\n with torch.no_grad():\n outputs = net(X_test_torch)\n # Scale the predictions back to the original scale\n outputs = scaler.inverse_transform(outputs.numpy())\n ensemble_preds.append(outputs)\n \nensemble_preds = np.array(ensemble_preds)\n\n# Get the mean of the predictions\n\nensemble_preds_mean = ensemble_preds.mean(axis=0)\n\n# Get the RMSE of the ensemble\n\ncriterion = nn.MSELoss()\nloss = criterion(torch.tensor(ensemble_preds_mean), y_test_torch)\nprint(f\"Test loss: {np.sqrt(loss.item())}\")\n\nTest loss: 83.04458927945727" + "text": "In this post, we look at AutoPyTorch, a framework for automated machine learning.\nimport os\nimport tempfile as tmp\nimport warnings\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\n%config InlineBackend.figure_format = 'retina'\n\nimport sklearn.datasets\nimport sklearn.model_selection\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\nfrom autoPyTorch.api.tabular_regression import TabularRegressionTask\nX, y = sklearn.datasets.load_diabetes(return_X_y=True, as_frame=True)\nX_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X,\n y,\n random_state=1,\n)\n\n# Obtain training and validation data\nX_train, X_valid, y_train, y_valid = sklearn.model_selection.train_test_split(\n X_train,\n y_train,\n random_state=1,\n)\nX_train.head()\n\n\n\n\n\n\n\n\nage\nsex\nbmi\nbp\ns1\ns2\ns3\ns4\ns5\ns6\n\n\n\n\n52\n-0.052738\n-0.044642\n-0.009439\n-0.005671\n0.039710\n0.044719\n0.026550\n-0.002592\n-0.018118\n-0.013504\n\n\n121\n0.063504\n-0.044642\n0.017506\n0.021872\n0.008063\n0.021546\n-0.036038\n0.034309\n0.019908\n0.011349\n\n\n170\n0.023546\n0.050680\n-0.020218\n-0.036656\n-0.013953\n-0.015092\n0.059685\n-0.039493\n-0.096433\n-0.017646\n\n\n287\n0.045341\n-0.044642\n-0.006206\n-0.015999\n0.125019\n0.125198\n0.019187\n0.034309\n0.032433\n-0.005220\n\n\n397\n0.052606\n-0.044642\n-0.004050\n-0.030918\n-0.046975\n-0.058307\n-0.013948\n-0.025840\n0.036056\n0.023775\ny_train.head()\n\n52 59.0\n121 173.0\n170 47.0\n287 219.0\n397 198.0\nName: target, dtype: float64\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.metrics import mean_squared_error\n\nrf = RandomForestRegressor(random_state=1)\nrf.fit(X_train, y_train)\ny_pred_rf = rf.predict(X_test)\nnp.sqrt(mean_squared_error(y_test, y_pred_rf))\n\n62.77500577100372\npred_df = pd.DataFrame({\"rf\": y_pred_rf, \"true\": y_test})\npred_df.head()\n\n\n\n\n\n\n\n\nrf\ntrue\n\n\n\n\n246\n140.76\n78.0\n\n\n425\n109.89\n152.0\n\n\n293\n161.93\n200.0\n\n\n31\n70.81\n59.0\n\n\n359\n150.91\n311.0\n# Use validation dataset to find best hyperparameters for RF\nrf = RandomForestRegressor(random_state=1)\nhyperparameters = {\"n_estimators\": [10, 100, 1000], \"max_depth\": [1, 5, 10]}\n\nfrom sklearn.model_selection import GridSearchCV\n\ngrid_search = GridSearchCV(rf, hyperparameters, cv=5, scoring=\"neg_root_mean_squared_error\")\ngrid_search.fit(X_valid, y_valid)\n\ngrid_search.best_params_\n\n\n{'max_depth': 5, 'n_estimators': 100}\n# Train the RF model using the best hyperparameters on train + validation data\n\nrf = RandomForestRegressor(**grid_search.best_params_, random_state=1)\n# Combine train and validation data\nX_train_overall = pd.concat([X_train, X_valid])\ny_train_overall = pd.concat([y_train, y_valid])\nrf.fit(X_train_overall, y_train_overall)\ny_pred_rf = rf.predict(X_test)\nnp.sqrt(mean_squared_error(y_test, y_pred_rf))\n\n61.69476644955032\napi = TabularRegressionTask()\n\n# Do an api search without any memory limit but use only MLPs\n\napi.search(\n X_train=X_train_overall,\n y_train=y_train_overall,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='r2',\n total_walltime_limit=80,\n func_eval_time_limit_secs=10,\n dataset_name=\"Diabetes\",\n memory_limit=None,\n enable_traditional_pipeline=True,\n )\n\n<autoPyTorch.api.tabular_regression.TabularRegressionTask at 0x1945be6d0>\ny_pred = api.predict(X_test)\nscore = api.score(y_pred, y_test)\nprint(score)\nnp.sqrt(mean_squared_error(y_test, y_pred))\n\n{'r2': 0.3026643977627368}\n\n\n60.33987680300709\n# Print statistics from search\nprint(api.sprint_statistics())\n\nautoPyTorch results:\n Dataset name: Diabetes\n Optimisation Metric: r2\n Best validation score: 0.4352600925944532\n Number of target algorithm runs: 13\n Number of successful target algorithm runs: 10\n Number of crashed target algorithm runs: 2\n Number of target algorithms that exceeded the time limit: 1\n Number of target algorithms that exceeded the memory limit: 0\napi.get_models_with_weights()[0]\n\n(0.62,\n MyTraditionalTabularRegressionPipeline(config='random_forest',\n dataset_properties={'categorical_columns': [],\n 'categories': [],\n 'input_shape': (10,),\n 'is_small_preprocess': True,\n 'issigned': True,\n 'issparse': False,\n 'numerical_columns': [0,\n 1,\n 2,\n 3,\n 4,\n 5,\n 6,\n 7,\n 8,\n 9],\n 'output_shape': 1,\n 'output_type': 'continuous',\n 'target_type': 'tabular_regression',\n 'task_type': 'tabular_regression'},\n init_params={'instance': None},\n random_state=RandomState(MT19937) at 0x194150240))\napi.get_models_with_weights()[1]\n\n(0.18,\n ________________________________________\n TabularRegressionPipeline\n ________________________________________\n 0-) imputer: \n SimpleImputer\n \n 1-) variance_threshold: \n VarianceThreshold\n \n 2-) coalescer: \n NoCoalescer\n \n 3-) encoder: \n NoEncoder\n \n 4-) scaler: \n StandardScaler\n \n 5-) feature_preprocessor: \n NoFeaturePreprocessor\n \n 6-) tabular_transformer: \n TabularColumnTransformer\n \n 7-) preprocessing: \n EarlyPreprocessing\n \n 8-) network_embedding: \n autoPyTorch.pipeline NoEmbedding\n \n 9-) network_backbone: \n autoPyTorch.pipeline ShapedMLPBackbone\n \n 10-) network_head: \n autoPyTorch.pipeline FullyConnectedHead\n \n 11-) network: \n Sequential ({'random_state': RandomState(MT19937) at 0x19465E140, '_fit_requirements': [FitRequirement(name='network_head', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False), FitRequirement(name='network_backbone', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False), FitRequirement(name='network_embedding', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False)], '_cs_updates': {}, 'device': device(type='cpu'), 'network': Sequential(\n (0): _NoEmbedding()\n (1): Sequential(\n (0): Linear(in_features=10, out_features=200, bias=True)\n (1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU()\n (3): Linear(in_features=200, out_features=200, bias=True)\n (4): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (5): ReLU()\n (6): Linear(in_features=200, out_features=200, bias=True)\n (7): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (8): ReLU()\n (9): Linear(in_features=200, out_features=200, bias=True)\n (10): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (11): ReLU()\n (12): Linear(in_features=200, out_features=200, bias=True)\n )\n (2): Sequential(\n (0): Flatten(start_dim=1, end_dim=-1)\n (1): Linear(in_features=200, out_features=128, bias=True)\n (2): ReLU()\n (3): Linear(in_features=128, out_features=1, bias=True)\n )\n ), 'final_activation': None, 'is_fitted_': True})\n \n 12-) network_init: \n XavierInit\n \n 13-) optimizer: \n Adam ({'random_state': RandomState(MT19937) at 0x19465E140, '_fit_requirements': [FitRequirement(name='network', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False)], '_cs_updates': {}, 'optimizer': Adam (\n Parameter Group 0\n amsgrad: False\n betas: (0.9, 0.9)\n capturable: False\n eps: 1e-08\n foreach: None\n lr: 0.01\n maximize: False\n weight_decay: 0.0\n ), 'lr': 0.01, 'beta1': 0.9, 'beta2': 0.9, 'weight_decay': 0.0})\n \n 14-) lr_scheduler: \n ReduceLROnPlateau\n \n 15-) data_loader: \n DataLoader\n \n 16-) trainer: \n autoPyTorch.pipeline Standard Trainer\n \n ________________________________________)\napi.get_models_with_weights()[2]\n\n(0.16,\n MyTraditionalTabularRegressionPipeline(config='svm',\n dataset_properties={'categorical_columns': [],\n 'categories': [],\n 'input_shape': (10,),\n 'is_small_preprocess': True,\n 'issigned': True,\n 'issparse': False,\n 'numerical_columns': [0,\n 1,\n 2,\n 3,\n 4,\n 5,\n 6,\n 7,\n 8,\n 9],\n 'output_shape': 1,\n 'output_type': 'continuous',\n 'target_type': 'tabular_regression',\n 'task_type': 'tabular_regression'},\n init_params={'instance': None},\n random_state=RandomState(MT19937) at 0x194711B40))\nWhat if we fit only NNs?\napi2 = TabularRegressionTask(seed=2, ensemble_size=0)\n\napi2.search(\n X_train=X_train,\n y_train=y_train,\n X_test=X_test.copy(),\n y_test=y_test.copy(),\n optimize_metric='r2',\n total_walltime_limit=40,\n func_eval_time_limit_secs=10,\n dataset_name=\"Diabetes\",\n memory_limit=None,\n enable_traditional_pipeline=False,\n )\n\n[WARNING] [2023-02-27 18:29:06,260:Client-autoPyTorch.automl_common.common.utils.backend] Directory /var/folders/1x/wmgn24mn1bbd2vgbqlk98tbc0000gn/T/autoPyTorch_tmp_70d9fdb4-b69e-11ed-b5ea-3c7d0a00e5d9/.autoPyTorch/ensembles does not exist\n[ERROR] [2023-02-27 18:29:06,261:Client-AutoPyTorch:Diabetes:2] No valid ensemble was created. Please check the logfile for errors. Default to the best individual estimator:[(2, 2, 5.555555555555555)]\nNoneType: None\n\n\n<autoPyTorch.api.tabular_regression.TabularRegressionTask at 0x194d3f160>\ny_pred2 = api2.predict(X_test)\nscore2 = api2.score(y_pred2, y_test)\nscore2\n\n{'r2': -0.37656772470491995}\nnp.sqrt(mean_squared_error(y_test, y_pred2))\n\n84.77782906691597\nOkay, it seems we are worse than the random forest. Let’s see what happened.\napi2.get_models_with_weights()[0]\n\n(1.0,\n ________________________________________\n TabularRegressionPipeline\n ________________________________________\n 0-) imputer: \n SimpleImputer\n \n 1-) variance_threshold: \n VarianceThreshold\n \n 2-) coalescer: \n NoCoalescer\n \n 3-) encoder: \n NoEncoder\n \n 4-) scaler: \n StandardScaler\n \n 5-) feature_preprocessor: \n NoFeaturePreprocessor\n \n 6-) tabular_transformer: \n TabularColumnTransformer\n \n 7-) preprocessing: \n EarlyPreprocessing\n \n 8-) network_embedding: \n autoPyTorch.pipeline NoEmbedding\n \n 9-) network_backbone: \n autoPyTorch.pipeline ShapedMLPBackbone\n \n 10-) network_head: \n autoPyTorch.pipeline FullyConnectedHead\n \n 11-) network: \n Sequential ({'random_state': RandomState(MT19937) at 0x194711A40, '_fit_requirements': [FitRequirement(name='network_head', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False), FitRequirement(name='network_backbone', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False), FitRequirement(name='network_embedding', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False)], '_cs_updates': {}, 'device': device(type='cpu'), 'network': Sequential(\n (0): _NoEmbedding()\n (1): Sequential(\n (0): Linear(in_features=10, out_features=200, bias=True)\n (1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU()\n (3): Linear(in_features=200, out_features=200, bias=True)\n (4): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (5): ReLU()\n (6): Linear(in_features=200, out_features=200, bias=True)\n (7): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (8): ReLU()\n (9): Linear(in_features=200, out_features=200, bias=True)\n (10): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (11): ReLU()\n (12): Linear(in_features=200, out_features=200, bias=True)\n )\n (2): Sequential(\n (0): Flatten(start_dim=1, end_dim=-1)\n (1): Linear(in_features=200, out_features=128, bias=True)\n (2): ReLU()\n (3): Linear(in_features=128, out_features=1, bias=True)\n )\n ), 'final_activation': None, 'is_fitted_': True})\n \n 12-) network_init: \n XavierInit\n \n 13-) optimizer: \n Adam ({'random_state': RandomState(MT19937) at 0x194711A40, '_fit_requirements': [FitRequirement(name='network', supported_types=(<class 'torch.nn.modules.module.Module'>,), user_defined=False, dataset_property=False)], '_cs_updates': {}, 'optimizer': Adam (\n Parameter Group 0\n amsgrad: False\n betas: (0.9, 0.9)\n capturable: False\n eps: 1e-08\n foreach: None\n lr: 0.01\n maximize: False\n weight_decay: 0.0\n ), 'lr': 0.01, 'beta1': 0.9, 'beta2': 0.9, 'weight_decay': 0.0})\n \n 14-) lr_scheduler: \n ReduceLROnPlateau\n \n 15-) data_loader: \n DataLoader\n \n 16-) trainer: \n autoPyTorch.pipeline Standard Trainer\n \n ________________________________________)\nimport torch\nX_train_torch = torch.from_numpy(X_train.values).float()\ny_train_torch = torch.from_numpy(y_train.values).float()\nX_val_torch = torch.from_numpy(X_valid.values).float()\ny_val_torch = torch.from_numpy(y_valid.values).float()\nX_test_torch = torch.from_numpy(X_test.values).float()\ny_test_torch = torch.from_numpy(y_test.values).float()\n# Build a simple MLP in PyTorch, train on training data and optimize on validation data\n\n\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.optim as optim\n\nclass Net(nn.Module):\n def __init__(self, l1 = 64, l2 = 64, l3 = 64, l2_reg = 0.0):\n super(Net, self).__init__()\n self.l2_reg = l2_reg\n self.fc1 = nn.Linear(10, l1)\n self.fc2 = nn.Linear(l1, l2)\n self.fc3 = nn.Linear(l2, l3)\n self.fc4 = nn.Linear(l3, 1)\n\n def forward(self, x):\n # Add a residual connection\n x = F.relu(self.fc1(x))\n x = F.relu(self.fc2(x))\n x = F.relu(self.fc3(x))\n x = self.fc4(x)\n return x\n \n\nnet = Net()\ncriterion = nn.MSELoss()\noptimizer = optim.Adam(net.parameters(), lr=0.001)\n\n\n\ntrain_losses = []\nval_losses = []\ntest_losses = []\n\n\nfor epoch in range(1000): # loop over the dataset multiple times and store the train, test loss\n optimizer.zero_grad()\n outputs = net(X_train_torch)\n loss = criterion(outputs, y_train_torch)\n # Add L2 regularization\n for param in net.parameters():\n loss += net.l2_reg * torch.norm(param)\n loss.backward()\n optimizer.step()\n\n train_losses.append(np.sqrt(loss.item()))\n val_losses.append(np.sqrt(criterion(net(X_val_torch), y_val_torch).item()))\n test_losses.append(np.sqrt(criterion(net(X_test_torch), y_test_torch).item()))\n \nprint('Finished Training')\n\nFinished Training\nplt.plot(train_losses, label=\"Train\")\nplt.plot(test_losses, label=\"Test\")\nplt.plot(val_losses, label=\"Val\")\nplt.legend()\nwith torch.no_grad():\n outputs = net(X_test_torch)\n loss = criterion(outputs, y_test_torch)\n print(f\"Test loss: {np.sqrt(loss.item())}\")\n outputs = net(X_train_torch) \n loss = criterion(outputs, y_train_torch)\n print(f\"Train loss: {np.sqrt(loss.item())}\")\n print(\"\")\n\nTest loss: 72.43299031475574\nTrain loss: 79.2251754050993\n# Fit the NN model using scaled y values\n# Using sklearn's StandardScaler to scale the y values\n\nfrom sklearn.preprocessing import StandardScaler\n\nscaler = StandardScaler()\ny_train_scaled = scaler.fit_transform(y_train.values.reshape(-1, 1))\ny_train_scaled = torch.from_numpy(y_train_scaled).float()\ny_test_scaled = scaler.transform(y_test.values.reshape(-1, 1))\ny_test_scaled = torch.from_numpy(y_test_scaled).float()\ny_valid_scaled = scaler.transform(y_valid.values.reshape(-1, 1))\ny_valid_scaled = torch.from_numpy(y_valid_scaled).float()\n\nnet = Net()\ncriterion = nn.MSELoss()\noptimizer = optim.Adam(net.parameters(), lr=0.01)\n\ntrain_losses = []\nval_losses = []\ntest_losses = []\n\n\nfor epoch in range(4000): # loop over the dataset multiple times and store the train, test loss\n optimizer.zero_grad()\n outputs = net(X_train_torch)\n loss = criterion(outputs, y_train_scaled)\n # Add L2 regularization\n for param in net.parameters():\n loss += net.l2_reg * torch.norm(param)\n loss.backward()\n optimizer.step()\n\n train_losses.append(np.sqrt(loss.item()))\n\n \nprint('Finished Training')\n\n\nFinished Training\nplt.plot(train_losses, label=\"Train\")\n# Predict the scaled y values and inverse transform them to get the original y values\n\nwith torch.no_grad():\n outputs = net(X_test_torch)\n# Inverse transform the scaled y values to get the original y values\n\ny_pred = scaler.inverse_transform(outputs.numpy())\ny_pred = y_pred.reshape(-1)\nprint(f\"Test RMSE: {np.sqrt(mean_squared_error(y_test, y_pred))}\")\n\nTest RMSE: 82.79070717489064\npd.DataFrame({\"y_test\": y_test, \"y_pred\": y_pred}).plot.scatter(x=\"y_test\", y=\"y_pred\")\n# At this point it seems our model is worse than RF, Auto model containing Catboost etc. Let us optimize our NN\n# I will use Ray Tune to optimize the hyperparameters\n\nimport ray\nfrom ray import tune\nfrom ray.tune import CLIReporter\nfrom ray.tune.schedulers import ASHAScheduler\n\ndef train_mlp(config):\n net = Net(config[\"l1\"], config[\"l2\"], config[\"l3\"], config[\"l2_reg\"])\n \n criterion = nn.MSELoss()\n \n criterion = nn.MSELoss()\n optimizer = optim.Adam(net.parameters(), lr=config[\"lr\"])\n for epoch in range(1000): # loop over the dataset multiple times and store the train, test loss\n optimizer.zero_grad()\n outputs = net(X_train_torch)\n loss = criterion(outputs, y_train_scaled)\n # Add l2 regularization to loss\n l2_reg = torch.tensor(0.)\n for param in net.parameters():\n l2_reg += torch.norm(param)\n loss += config[\"l2_reg\"] * l2_reg\n loss.backward()\n optimizer.step()\n with torch.no_grad():\n outputs = net(X_val_torch)\n # Inverse transform the scaled y values to get the original y values\n y_pred = scaler.inverse_transform(outputs.numpy())\n loss = np.sqrt(mean_squared_error(y_valid, y_pred))\n return {\"loss\": loss, \"status\": \"ok\"}\nray.shutdown()\n\nray.init()\nscheduler = ASHAScheduler(\n metric=\"loss\",\n mode=\"min\",\n max_t=100,\n grace_period=1,\n reduction_factor=2)\n\nreporter = CLIReporter(\n parameter_columns=[\"l1\", \"l2\", \"l3\", \"lr\", \"l2_reg\"],\n metric_columns=[\"loss\", \"training_iteration\"])\n\nanalysis = tune.run(\n train_mlp,\n resources_per_trial={\"cpu\": 1},\n config={\n \"l1\": tune.choice([2, 4, 16, 32, 64, 128, 256]),\n \"l2\": tune.choice([2, 4, 16, 32, 64, 128, 256]),\n \"l3\": tune.choice([2, 4, 16, 32, 64, 128, 256]),\n \"l2_reg\": tune.loguniform(1e-4, 1.0),\n \"lr\": tune.loguniform(1e-4, 1.0),\n },\n num_samples=50,\n scheduler=scheduler,\n progress_reporter=reporter)\n\n\n\n Trial Progress\n \n\n\n\nTrial name\ndate\ndone\nepisodes_total\nexperiment_id\nhostname\niterations_since_restore\nloss\nnode_ip\npid\nstatus\ntime_since_restore\ntime_this_iter_s\ntime_total_s\ntimestamp\ntimesteps_since_restore\ntimesteps_total\ntraining_iteration\ntrial_id\nwarmup_time\n\n\n\n\ntrain_mlp_c8d42_00000\n2023-02-28_10-08-51\nTrue\n\n684a45c7048544b384d756bd20117055\nNipuns-iMac-8.local\n1\n76.2275\n127.0.0.1\n99588\nok\n2.44738\n2.44738\n2.44738\n1677559131\n0\n\n1\nc8d42_00000\n0.00341606\n\n\ntrain_mlp_c8d42_00001\n2023-02-28_10-08-59\nTrue\n\n23a98dd3be3d4ad2abbc0a44970bd41b\nNipuns-iMac-8.local\n1\n76.2183\n127.0.0.1\n99594\nok\n4.5987\n4.5987\n4.5987\n1677559139\n0\n\n1\nc8d42_00001\n0.00891805\n\n\ntrain_mlp_c8d42_00002\n2023-02-28_10-08-58\nTrue\n\nf1614c13b37a4966adb785c911a54c11\nNipuns-iMac-8.local\n1\n76.2261\n127.0.0.1\n99595\nok\n2.89705\n2.89705\n2.89705\n1677559138\n0\n\n1\nc8d42_00002\n0.0356119\n\n\ntrain_mlp_c8d42_00003\n2023-02-28_10-08-57\nTrue\n\ne9475ee5dd1f4d1289ef144d1a7879ad\nNipuns-iMac-8.local\n1\n51.6545\n127.0.0.1\n99596\nok\n2.83387\n2.83387\n2.83387\n1677559137\n0\n\n1\nc8d42_00003\n0.00670218\n\n\ntrain_mlp_c8d42_00004\n2023-02-28_10-08-57\nTrue\n\n97bdec82d1ec44069bc84210c4a6a18e\nNipuns-iMac-8.local\n1\n65.3017\n127.0.0.1\n99597\nok\n2.6755\n2.6755\n2.6755\n1677559137\n0\n\n1\nc8d42_00004\n0.00920701\n\n\ntrain_mlp_c8d42_00005\n2023-02-28_10-08-59\nTrue\n\n3db458192e6944118049623bfe3128a4\nNipuns-iMac-8.local\n1\n76.1747\n127.0.0.1\n99598\nok\n4.36245\n4.36245\n4.36245\n1677559139\n0\n\n1\nc8d42_00005\n0.00544381\n\n\ntrain_mlp_c8d42_00006\n2023-02-28_10-08-59\nTrue\n\n6b794ba131304c9e841b2d671a1a4693\nNipuns-iMac-8.local\n1\n76.2181\n127.0.0.1\n99601\nok\n4.51009\n4.51009\n4.51009\n1677559139\n0\n\n1\nc8d42_00006\n0.00939798\n\n\ntrain_mlp_c8d42_00007\n2023-02-28_10-09-06\nTrue\n\n38d637c01fcb43d3be8db783cb837ea2\nNipuns-iMac-8.local\n1\n76.7874\n127.0.0.1\n99602\nok\n11.1744\n11.1744\n11.1744\n1677559146\n0\n\n1\nc8d42_00007\n0.012325\n\n\ntrain_mlp_c8d42_00008\n2023-02-28_10-09-00\nTrue\n\na15c3445ce1e4ffeba2eabf9098820f2\nNipuns-iMac-8.local\n1\n76.1934\n127.0.0.1\n99604\nok\n4.98043\n4.98043\n4.98043\n1677559140\n0\n\n1\nc8d42_00008\n0.010926\n\n\ntrain_mlp_c8d42_00009\n2023-02-28_10-09-00\nTrue\n\n9de7e974434443f6a12e1dbc614a8582\nNipuns-iMac-8.local\n1\n68.2169\n127.0.0.1\n99612\nok\n5.00434\n5.00434\n5.00434\n1677559140\n0\n\n1\nc8d42_00009\n0.00496888\n\n\ntrain_mlp_c8d42_00010\n2023-02-28_10-09-00\nTrue\n\nd7fa3bf71c4d44dd90e2988c0e59687a\nNipuns-iMac-8.local\n1\n76.2198\n127.0.0.1\n99613\nok\n5.70828\n5.70828\n5.70828\n1677559140\n0\n\n1\nc8d42_00010\n0.00938773\n\n\ntrain_mlp_c8d42_00011\n2023-02-28_10-09-03\nTrue\n\n6e08564fbc6d4f5c9bea59429a196a5e\nNipuns-iMac-8.local\n1\n52.2075\n127.0.0.1\n99614\nok\n8.02228\n8.02228\n8.02228\n1677559143\n0\n\n1\nc8d42_00011\n0.00922298\n\n\ntrain_mlp_c8d42_00012\n2023-02-28_10-08-59\nTrue\n\n98349fab3a1b4027bc14d3ce239251fd\nNipuns-iMac-8.local\n1\n76.1125\n127.0.0.1\n99615\nok\n4.87773\n4.87773\n4.87773\n1677559139\n0\n\n1\nc8d42_00012\n0.00547719\n\n\ntrain_mlp_c8d42_00013\n2023-02-28_10-08-58\nTrue\n\n6356d7d5564f49ac84f1f5221bccaa3e\nNipuns-iMac-8.local\n1\n76.2163\n127.0.0.1\n99616\nok\n3.31126\n3.31126\n3.31126\n1677559138\n0\n\n1\nc8d42_00013\n0.0112782\n\n\ntrain_mlp_c8d42_00014\n2023-02-28_10-09-00\nTrue\n\ne4c8bef4a7e246c19960353c87f60541\nNipuns-iMac-8.local\n1\n76.2002\n127.0.0.1\n99617\nok\n5.44697\n5.44697\n5.44697\n1677559140\n0\n\n1\nc8d42_00014\n0.0142629\n\n\ntrain_mlp_c8d42_00015\n2023-02-28_10-09-01\nTrue\n\nf91e68c1ff9a486f9f74c54403e236f4\nNipuns-iMac-8.local\n1\n51.5808\n127.0.0.1\n99618\nok\n5.94224\n5.94224\n5.94224\n1677559141\n0\n\n1\nc8d42_00015\n0.00973487\n\n\ntrain_mlp_c8d42_00016\n2023-02-28_10-08-59\nTrue\n\n684a45c7048544b384d756bd20117055\nNipuns-iMac-8.local\n1\n85.4019\n127.0.0.1\n99588\nok\n4.64298\n4.64298\n4.64298\n1677559139\n0\n\n1\nc8d42_00016\n0.00341606\n\n\ntrain_mlp_c8d42_00017\n2023-02-28_10-09-01\nTrue\n\n97bdec82d1ec44069bc84210c4a6a18e\nNipuns-iMac-8.local\n1\n76.2217\n127.0.0.1\n99597\nok\n3.26168\n3.26168\n3.26168\n1677559141\n0\n\n1\nc8d42_00017\n0.00920701\n\n\ntrain_mlp_c8d42_00018\n2023-02-28_10-09-00\nTrue\n\ne9475ee5dd1f4d1289ef144d1a7879ad\nNipuns-iMac-8.local\n1\n52.4798\n127.0.0.1\n99596\nok\n2.94631\n2.94631\n2.94631\n1677559140\n0\n\n1\nc8d42_00018\n0.00670218\n\n\ntrain_mlp_c8d42_00019\n2023-02-28_10-09-10\nTrue\n\nf1614c13b37a4966adb785c911a54c11\nNipuns-iMac-8.local\n1\n51.5559\n127.0.0.1\n99595\nok\n12.1603\n12.1603\n12.1603\n1677559150\n0\n\n1\nc8d42_00019\n0.0356119\n\n\ntrain_mlp_c8d42_00020\n2023-02-28_10-09-04\nTrue\n\n6356d7d5564f49ac84f1f5221bccaa3e\nNipuns-iMac-8.local\n1\n54.8401\n127.0.0.1\n99616\nok\n5.67631\n5.67631\n5.67631\n1677559144\n0\n\n1\nc8d42_00020\n0.0112782\n\n\ntrain_mlp_c8d42_00021\n2023-02-28_10-09-03\nTrue\n\n3db458192e6944118049623bfe3128a4\nNipuns-iMac-8.local\n1\n80.9971\n127.0.0.1\n99598\nok\n4.02288\n4.02288\n4.02288\n1677559143\n0\n\n1\nc8d42_00021\n0.00544381\n\n\ntrain_mlp_c8d42_00022\n2023-02-28_10-09-04\nTrue\n\n6b794ba131304c9e841b2d671a1a4693\nNipuns-iMac-8.local\n1\n62.7729\n127.0.0.1\n99601\nok\n5.19184\n5.19184\n5.19184\n1677559144\n0\n\n1\nc8d42_00022\n0.00939798\n\n\ntrain_mlp_c8d42_00023\n2023-02-28_10-09-03\nTrue\n\n23a98dd3be3d4ad2abbc0a44970bd41b\nNipuns-iMac-8.local\n1\n52.487\n127.0.0.1\n99594\nok\n3.83576\n3.83576\n3.83576\n1677559143\n0\n\n1\nc8d42_00023\n0.00891805\n\n\ntrain_mlp_c8d42_00024\n2023-02-28_10-09-09\nTrue\n\n684a45c7048544b384d756bd20117055\nNipuns-iMac-8.local\n1\n76.1853\n127.0.0.1\n99588\nok\n10.1193\n10.1193\n10.1193\n1677559149\n0\n\n1\nc8d42_00024\n0.00341606\n\n\ntrain_mlp_c8d42_00025\n2023-02-28_10-09-04\nTrue\n\n98349fab3a1b4027bc14d3ce239251fd\nNipuns-iMac-8.local\n1\n76.2173\n127.0.0.1\n99615\nok\n4.14031\n4.14031\n4.14031\n1677559144\n0\n\n1\nc8d42_00025\n0.00547719\n\n\ntrain_mlp_c8d42_00026\n2023-02-28_10-09-06\nTrue\n\na15c3445ce1e4ffeba2eabf9098820f2\nNipuns-iMac-8.local\n1\n68.3927\n127.0.0.1\n99604\nok\n6.18441\n6.18441\n6.18441\n1677559146\n0\n\n1\nc8d42_00026\n0.010926\n\n\ntrain_mlp_c8d42_00027\n2023-02-28_10-09-03\nTrue\n\n9de7e974434443f6a12e1dbc614a8582\nNipuns-iMac-8.local\n1\n76.218\n127.0.0.1\n99612\nok\n3.01754\n3.01754\n3.01754\n1677559143\n0\n\n1\nc8d42_00027\n0.00496888\n\n\ntrain_mlp_c8d42_00028\n2023-02-28_10-09-06\nTrue\n\ne4c8bef4a7e246c19960353c87f60541\nNipuns-iMac-8.local\n1\n66.4336\n127.0.0.1\n99617\nok\n5.79445\n5.79445\n5.79445\n1677559146\n0\n\n1\nc8d42_00028\n0.0142629\n\n\ntrain_mlp_c8d42_00029\n2023-02-28_10-09-03\nTrue\n\nd7fa3bf71c4d44dd90e2988c0e59687a\nNipuns-iMac-8.local\n1\n76.2164\n127.0.0.1\n99613\nok\n2.94911\n2.94911\n2.94911\n1677559143\n0\n\n1\nc8d42_00029\n0.00938773\n\n\ntrain_mlp_c8d42_00030\n2023-02-28_10-09-05\nTrue\n\ne9475ee5dd1f4d1289ef144d1a7879ad\nNipuns-iMac-8.local\n1\n75.2123\n127.0.0.1\n99596\nok\n4.99429\n4.99429\n4.99429\n1677559145\n0\n\n1\nc8d42_00030\n0.00670218\n\n\ntrain_mlp_c8d42_00031\n2023-02-28_10-09-06\nTrue\n\nf91e68c1ff9a486f9f74c54403e236f4\nNipuns-iMac-8.local\n1\n51.925\n127.0.0.1\n99618\nok\n4.96918\n4.96918\n4.96918\n1677559146\n0\n\n1\nc8d42_00031\n0.00973487\n\n\ntrain_mlp_c8d42_00032\n2023-02-28_10-09-04\nTrue\n\n97bdec82d1ec44069bc84210c4a6a18e\nNipuns-iMac-8.local\n1\n76.4117\n127.0.0.1\n99597\nok\n3.24968\n3.24968\n3.24968\n1677559144\n0\n\n1\nc8d42_00032\n0.00920701\n\n\ntrain_mlp_c8d42_00033\n2023-02-28_10-09-08\nTrue\n\n6e08564fbc6d4f5c9bea59429a196a5e\nNipuns-iMac-8.local\n1\n70.9285\n127.0.0.1\n99614\nok\n5.80603\n5.80603\n5.80603\n1677559148\n0\n\n1\nc8d42_00033\n0.00922298\n\n\ntrain_mlp_c8d42_00034\n2023-02-28_10-09-08\nTrue\n\n9de7e974434443f6a12e1dbc614a8582\nNipuns-iMac-8.local\n1\n51.9675\n127.0.0.1\n99612\nok\n5.20552\n5.20552\n5.20552\n1677559148\n0\n\n1\nc8d42_00034\n0.00496888\n\n\ntrain_mlp_c8d42_00035\n2023-02-28_10-09-07\nTrue\n\n3db458192e6944118049623bfe3128a4\nNipuns-iMac-8.local\n1\n76.1776\n127.0.0.1\n99598\nok\n3.86612\n3.86612\n3.86612\n1677559147\n0\n\n1\nc8d42_00035\n0.00544381\n\n\ntrain_mlp_c8d42_00036\n2023-02-28_10-09-07\nTrue\n\n23a98dd3be3d4ad2abbc0a44970bd41b\nNipuns-iMac-8.local\n1\n76.2167\n127.0.0.1\n99594\nok\n3.56161\n3.56161\n3.56161\n1677559147\n0\n\n1\nc8d42_00036\n0.00891805\n\n\ntrain_mlp_c8d42_00037\n2023-02-28_10-09-07\nTrue\n\nd7fa3bf71c4d44dd90e2988c0e59687a\nNipuns-iMac-8.local\n1\n51.8721\n127.0.0.1\n99613\nok\n4.08222\n4.08222\n4.08222\n1677559147\n0\n\n1\nc8d42_00037\n0.00938773\n\n\ntrain_mlp_c8d42_00038\n2023-02-28_10-09-10\nTrue\n\n98349fab3a1b4027bc14d3ce239251fd\nNipuns-iMac-8.local\n1\n76.2166\n127.0.0.1\n99615\nok\n6.54892\n6.54892\n6.54892\n1677559150\n0\n\n1\nc8d42_00038\n0.00547719\n\n\ntrain_mlp_c8d42_00039\n2023-02-28_10-09-07\nTrue\n\n6356d7d5564f49ac84f1f5221bccaa3e\nNipuns-iMac-8.local\n1\n71.0616\n127.0.0.1\n99616\nok\n3.14067\n3.14067\n3.14067\n1677559147\n0\n\n1\nc8d42_00039\n0.0112782\n\n\ntrain_mlp_c8d42_00040\n2023-02-28_10-09-07\nTrue\n\n97bdec82d1ec44069bc84210c4a6a18e\nNipuns-iMac-8.local\n1\n78.2332\n127.0.0.1\n99597\nok\n3.1038\n3.1038\n3.1038\n1677559147\n0\n\n1\nc8d42_00040\n0.00920701\n\n\ntrain_mlp_c8d42_00041\n2023-02-28_10-09-09\nTrue\n\n6b794ba131304c9e841b2d671a1a4693\nNipuns-iMac-8.local\n1\n76.2681\n127.0.0.1\n99601\nok\n4.69843\n4.69843\n4.69843\n1677559149\n0\n\n1\nc8d42_00041\n0.00939798\n\n\ntrain_mlp_c8d42_00042\n2023-02-28_10-09-09\nTrue\n\ne9475ee5dd1f4d1289ef144d1a7879ad\nNipuns-iMac-8.local\n1\n64.8587\n127.0.0.1\n99596\nok\n3.2481\n3.2481\n3.2481\n1677559149\n0\n\n1\nc8d42_00042\n0.00670218\n\n\ntrain_mlp_c8d42_00043\n2023-02-28_10-09-09\nTrue\n\nf91e68c1ff9a486f9f74c54403e236f4\nNipuns-iMac-8.local\n1\n76.2166\n127.0.0.1\n99618\nok\n2.99345\n2.99345\n2.99345\n1677559149\n0\n\n1\nc8d42_00043\n0.00973487\n\n\ntrain_mlp_c8d42_00044\n2023-02-28_10-09-09\nTrue\n\n38d637c01fcb43d3be8db783cb837ea2\nNipuns-iMac-8.local\n1\n76.2244\n127.0.0.1\n99602\nok\n3.30911\n3.30911\n3.30911\n1677559149\n0\n\n1\nc8d42_00044\n0.012325\n\n\ntrain_mlp_c8d42_00045\n2023-02-28_10-09-10\nTrue\n\na15c3445ce1e4ffeba2eabf9098820f2\nNipuns-iMac-8.local\n1\n51.9337\n127.0.0.1\n99604\nok\n3.84642\n3.84642\n3.84642\n1677559150\n0\n\n1\nc8d42_00045\n0.010926\n\n\ntrain_mlp_c8d42_00046\n2023-02-28_10-09-10\nTrue\n\ne4c8bef4a7e246c19960353c87f60541\nNipuns-iMac-8.local\n1\n76.2167\n127.0.0.1\n99617\nok\n3.67529\n3.67529\n3.67529\n1677559150\n0\n\n1\nc8d42_00046\n0.0142629\n\n\ntrain_mlp_c8d42_00047\n2023-02-28_10-09-09\nTrue\n\n23a98dd3be3d4ad2abbc0a44970bd41b\nNipuns-iMac-8.local\n1\n73.6947\n127.0.0.1\n99594\nok\n2.20929\n2.20929\n2.20929\n1677559149\n0\n\n1\nc8d42_00047\n0.00891805\n\n\ntrain_mlp_c8d42_00048\n2023-02-28_10-09-09\nTrue\n\n6356d7d5564f49ac84f1f5221bccaa3e\nNipuns-iMac-8.local\n1\n76.2164\n127.0.0.1\n99616\nok\n2.29378\n2.29378\n2.29378\n1677559149\n0\n\n1\nc8d42_00048\n0.0112782\n\n\ntrain_mlp_c8d42_00049\n2023-02-28_10-09-09\nTrue\n\n3db458192e6944118049623bfe3128a4\nNipuns-iMac-8.local\n1\n60.3637\n127.0.0.1\n99598\nok\n2.2852\n2.2852\n2.2852\n1677559149\n0\n\n1\nc8d42_00049\n0.00544381\n\n\n\n\n\n\n\n\n2023-02-28 10:09:11,027 INFO tune.py:762 -- Total run time: 26.09 seconds (25.84 seconds for the tuning loop).\n# Print the table for all the trials\ndf = analysis.results_df\n\n# Sort the table by loss\ndf.sort_values(by=\"loss\", inplace=True)\n\n# Show only loss, config/l1, config/l2, config/l3, config/lr, training_iteration\ndf[[\"loss\", \"config/l1\", \"config/l2\", \"config/l3\", \"config/lr\", \"config/l2_reg\"]]\n\n\n\n\n\n\n\n\nloss\nconfig/l1\nconfig/l2\nconfig/l3\nconfig/lr\nconfig/l2_reg\n\n\ntrial_id\n\n\n\n\n\n\n\n\n\n\nc8d42_00019\n51.555930\n256\n256\n128\n0.000125\n0.027615\n\n\nc8d42_00015\n51.580817\n4\n64\n256\n0.003980\n0.015450\n\n\nc8d42_00003\n51.654479\n2\n64\n4\n0.000991\n0.015054\n\n\nc8d42_00037\n51.872128\n128\n32\n32\n0.000114\n0.005181\n\n\nc8d42_00031\n51.925027\n128\n64\n32\n0.000103\n0.001919\n\n\nc8d42_00045\n51.933658\n64\n256\n16\n0.000139\n0.000221\n\n\nc8d42_00034\n51.967532\n2\n128\n128\n0.000371\n0.000860\n\n\nc8d42_00011\n52.207469\n16\n256\n128\n0.000134\n0.001002\n\n\nc8d42_00018\n52.479806\n2\n16\n4\n0.002317\n0.000166\n\n\nc8d42_00023\n52.486995\n64\n64\n16\n0.000214\n0.025084\n\n\nc8d42_00020\n54.840118\n2\n128\n128\n0.001178\n0.001264\n\n\nc8d42_00049\n60.363672\n4\n128\n16\n0.003238\n0.000639\n\n\nc8d42_00022\n62.772853\n256\n4\n64\n0.000419\n0.000484\n\n\nc8d42_00042\n64.858708\n128\n32\n2\n0.001164\n0.000313\n\n\nc8d42_00004\n65.301650\n4\n2\n32\n0.071916\n0.000168\n\n\nc8d42_00028\n66.433568\n128\n64\n128\n0.013376\n0.002368\n\n\nc8d42_00009\n68.216911\n32\n256\n2\n0.000931\n0.001888\n\n\nc8d42_00026\n68.392660\n2\n256\n64\n0.025460\n0.000803\n\n\nc8d42_00033\n70.928476\n2\n256\n64\n0.016914\n0.000214\n\n\nc8d42_00039\n71.061583\n16\n4\n64\n0.007347\n0.000135\n\n\nc8d42_00047\n73.694689\n4\n2\n32\n0.000104\n0.001308\n\n\nc8d42_00030\n75.212274\n16\n256\n2\n0.004354\n0.000746\n\n\nc8d42_00012\n76.112538\n128\n128\n4\n0.226784\n0.140003\n\n\nc8d42_00005\n76.174661\n4\n4\n256\n0.086190\n0.563438\n\n\nc8d42_00035\n76.177571\n64\n4\n128\n0.166371\n0.330122\n\n\nc8d42_00024\n76.185294\n32\n256\n256\n0.159865\n0.671139\n\n\nc8d42_00008\n76.193428\n256\n32\n16\n0.113514\n0.062898\n\n\nc8d42_00014\n76.200245\n32\n128\n128\n0.561836\n0.000226\n\n\nc8d42_00013\n76.216345\n64\n16\n64\n0.000173\n0.194086\n\n\nc8d42_00048\n76.216390\n64\n16\n64\n0.301651\n0.000141\n\n\nc8d42_00029\n76.216436\n16\n2\n2\n0.085926\n0.000206\n\n\nc8d42_00038\n76.216618\n16\n256\n256\n0.005644\n0.210313\n\n\nc8d42_00043\n76.216619\n64\n16\n64\n0.000284\n0.433444\n\n\nc8d42_00046\n76.216670\n256\n64\n2\n0.000617\n0.181541\n\n\nc8d42_00036\n76.216674\n2\n64\n64\n0.680740\n0.005030\n\n\nc8d42_00025\n76.217288\n16\n128\n16\n0.002700\n0.051269\n\n\nc8d42_00027\n76.218050\n2\n4\n32\n0.001925\n0.194493\n\n\nc8d42_00006\n76.218069\n256\n2\n64\n0.003326\n0.233648\n\n\nc8d42_00001\n76.218267\n2\n16\n256\n0.122404\n0.001683\n\n\nc8d42_00010\n76.219815\n128\n128\n64\n0.006478\n0.178694\n\n\nc8d42_00017\n76.221708\n32\n32\n16\n0.004100\n0.520138\n\n\nc8d42_00044\n76.224417\n256\n2\n2\n0.019401\n0.077370\n\n\nc8d42_00002\n76.226116\n16\n2\n64\n0.000326\n0.028026\n\n\nc8d42_00000\n76.227517\n4\n16\n128\n0.020000\n0.307903\n\n\nc8d42_00041\n76.268058\n256\n32\n64\n0.045152\n0.252533\n\n\nc8d42_00032\n76.411740\n32\n32\n2\n0.541939\n0.193151\n\n\nc8d42_00007\n76.787387\n256\n256\n2\n0.616285\n0.114236\n\n\nc8d42_00040\n78.233233\n64\n4\n16\n0.164726\n0.000119\n\n\nc8d42_00021\n80.997064\n128\n16\n32\n0.001292\n0.002832\n\n\nc8d42_00016\n85.401864\n32\n16\n256\n0.055791\n0.000965\n# Print the best hyperparameters\n\nanalysis.get_best_config(metric=\"loss\", mode=\"min\")\n\n{'l1': 256,\n 'l2': 256,\n 'l3': 128,\n 'l2_reg': 0.027614886800457164,\n 'lr': 0.00012453571993239395}\n# Perform the final test on the test set\n\nbest_config = analysis.get_best_config(metric=\"loss\", mode=\"min\")\n\n# Use the best hyperparameters to train the model\nnet = Net(best_config[\"l1\"], best_config[\"l2\"], best_config[\"l3\"], best_config[\"l2_reg\"])\ncriterion = nn.MSELoss()\noptimizer = optim.Adam(net.parameters(), lr=best_config[\"lr\"])\nfor epoch in range(1000): # loop over the dataset multiple times and store the train, test loss\n optimizer.zero_grad()\n outputs = net(X_train_torch)\n loss = criterion(outputs, y_train_scaled)\n # Add L2 regularization\n for param in net.parameters():\n loss += net.l2_reg * torch.norm(param)\n\n \n loss.backward()\n optimizer.step()\n\nwith torch.no_grad():\n outputs = net(X_test_torch)\n # Inverse transform the scaled y values to get the original y values\n y_pred = scaler.inverse_transform(outputs.numpy())\n loss = np.sqrt(mean_squared_error(y_test, y_pred))\n print(f\"Test RMSE: {loss}\")\n print(\"\")\n \n\nTest RMSE: 54.06271747689579\npd.DataFrame({\"y_test\": y_test, \"y_pred\": y_pred.reshape(-1)})\n\n\n\n\n\n\n\n\ny_test\ny_pred\n\n\n\n\n246\n78.0\n107.765800\n\n\n425\n152.0\n110.693916\n\n\n293\n200.0\n175.535324\n\n\n31\n59.0\n82.328522\n\n\n359\n311.0\n176.440598\n\n\n...\n...\n...\n\n\n117\n281.0\n246.310699\n\n\n139\n281.0\n245.451309\n\n\n218\n214.0\n118.843346\n\n\n93\n96.0\n77.727440\n\n\n420\n146.0\n134.773529\n\n\n\n\n111 rows × 2 columns\n# Thus far it seems even with hyperparameter tuning we are unable to match the performance of ensemble models. \n\n# Get the top 5 configurations\n\ndf[['loss', 'config/l1', 'config/l2', 'config/l3', 'config/lr', 'config/l2_reg']].head(5)\n\n\n\n\n\n\n\n\nloss\nconfig/l1\nconfig/l2\nconfig/l3\nconfig/lr\nconfig/l2_reg\n\n\ntrial_id\n\n\n\n\n\n\n\n\n\n\nc8d42_00019\n51.555930\n256\n256\n128\n0.000125\n0.027615\n\n\nc8d42_00015\n51.580817\n4\n64\n256\n0.003980\n0.015450\n\n\nc8d42_00003\n51.654479\n2\n64\n4\n0.000991\n0.015054\n\n\nc8d42_00037\n51.872128\n128\n32\n32\n0.000114\n0.005181\n\n\nc8d42_00031\n51.925027\n128\n64\n32\n0.000103\n0.001919\nconfig_list_of_dicts = df[['config/l1', 'config/l2', 'config/l3', 'config/lr', 'config/l2_reg']].head(5).to_dict('records')\n\n# Train an ensemble of 5 models using the top 5 configurations\n\nensemble = []\nfor config in config_list_of_dicts:\n net = Net(config[\"config/l1\"], config[\"config/l2\"], config[\"config/l3\"], config[\"config/l2_reg\"])\n criterion = nn.MSELoss()\n optimizer = optim.Adam(net.parameters(), lr=config[\"config/lr\"])\n for epoch in range(2000): # loop over the dataset multiple times and store the train, test loss\n optimizer.zero_grad()\n outputs = net(X_train_torch)\n loss = criterion(outputs, y_train_scaled)\n # Add L2 regularization\n for param in net.parameters():\n loss += net.l2_reg * torch.norm(param)\n loss.backward()\n optimizer.step()\n ensemble.append(net)\n# Get the predictions from the ensemble\n\nensemble_preds = []\nfor net in ensemble:\n with torch.no_grad():\n outputs = net(X_test_torch)\n # Scale the predictions back to the original scale\n outputs = scaler.inverse_transform(outputs.numpy())\n ensemble_preds.append(outputs)\n \nensemble_preds = np.array(ensemble_preds)\n\n# Get the mean of the predictions\n\nensemble_preds_mean = ensemble_preds.mean(axis=0)\n\n# Get the RMSE of the ensemble\n\ncriterion = nn.MSELoss()\nloss = criterion(torch.tensor(ensemble_preds_mean), y_test_torch)\nprint(f\"Test loss: {np.sqrt(loss.item())}\")\n\nTest loss: 83.04458927945727" }, { "objectID": "posts/auto-pytorch.html#inputs-to-meta-learning-model", @@ -627,7 +634,7 @@ "href": "posts/auto-pytorch.html#decoder-outputs", "title": "AutoML PyTorch", "section": "### Decoder outputs", - "text": "### Decoder outputs\n\nDecoding: Decoder is a neural network that takes in the query vector and encoder summary and outputs a vector of shape (Q, 1)\n\n\nD = X_train_torch.shape[1]\n\n\n# Building the encoder \n# Takes as input the number of features, the number of nodes in 3 hidden layers, and the L2 regularization parameter\n\nclass Encoder(nn.Module):\n def __init__(self, n_features = D+1, l1_size=32, l2_size=32, E = 32, l2_reg=0.0):\n super(Encoder, self).__init__()\n self.l1 = nn.Linear(n_features, l1_size)\n self.l2 = nn.Linear(l1_size, l2_size)\n self.l3 = nn.Linear(l2_size, E)\n self.l2_reg = l2_reg\n self.relu = nn.ReLU()\n \n def forward(self, x):\n x = self.relu(self.l1(x))\n x = self.relu(self.l2(x))\n x = self.relu(self.l3(x))\n return x\n\n\n# Create C context vectors and pass them through the encoder\n\nC = 5\nQ = 2\nencoder = Encoder(E=32)\n\n# Create the context vectors input\nC_idx = np.random.choice(X_train_torch.shape[0], C, replace=False)\nC_idx = torch.tensor(C_idx)\n\n# Get the Q query vectors\nall_idx = np.arange(X_train_torch.shape[0])\nQ_idx = np.random.choice(np.setdiff1d(all_idx, C_idx), Q, replace=False)\nQ_idx = torch.tensor(Q_idx)\n\nC_idx, Q_idx\n\n(tensor([108, 26, 25, 210, 151]), tensor([156, 230]))\n\n\n\nX_train_torch[C_idx], y_train_scaled[C_idx], y_train_torch[C_idx]\n\n(tensor([[-0.0019, -0.0446, 0.0542, -0.0665, 0.0727, 0.0566, -0.0434, 0.0849,\n 0.0845, 0.0486],\n [ 0.0817, 0.0507, 0.0013, 0.0356, 0.1264, 0.0911, 0.0192, 0.0343,\n 0.0845, -0.0301],\n [ 0.0126, -0.0446, 0.0067, -0.0562, -0.0759, -0.0664, -0.0213, -0.0376,\n -0.0181, -0.0922],\n [ 0.0272, -0.0446, 0.0067, 0.0356, 0.0796, 0.0707, 0.0155, 0.0343,\n 0.0407, 0.0113],\n [-0.0709, 0.0507, -0.0752, -0.0401, -0.0511, -0.0151, -0.0397, -0.0026,\n -0.0964, -0.0342]]),\n tensor([[ 0.4966],\n [ 0.5471],\n [ 0.3704],\n [-1.0812],\n [-1.2327]]),\n tensor([192., 196., 182., 67., 55.]))\n\n\n\ncontext_input = torch.cat((X_train_torch[C_idx], y_train_scaled[C_idx].reshape(-1, 1)), axis=1)\npd.DataFrame(context_input)\n\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n\n\n\n\n0\n-0.001882\n-0.044642\n0.054152\n-0.066495\n0.072732\n0.056619\n-0.043401\n0.084863\n0.084495\n0.048628\n0.496595\n\n\n1\n0.081666\n0.050680\n0.001339\n0.035644\n0.126395\n0.091065\n0.019187\n0.034309\n0.084495\n-0.030072\n0.547084\n\n\n2\n0.012648\n-0.044642\n0.006728\n-0.056166\n-0.075870\n-0.066449\n-0.021311\n-0.037648\n-0.018118\n-0.092204\n0.370372\n\n\n3\n0.027178\n-0.044642\n0.006728\n0.035644\n0.079612\n0.070710\n0.015505\n0.034309\n0.040672\n0.011349\n-1.081190\n\n\n4\n-0.070900\n0.050680\n-0.075186\n-0.040099\n-0.051103\n-0.015092\n-0.039719\n-0.002592\n-0.096433\n-0.034215\n-1.232658\n\n\n\n\n\n\n\n\n\nencoder(context_input).shape\n\ntorch.Size([5, 32])\n\n\n\nencoder(context_input).mean(axis=0).shape\n\ntorch.Size([32])\n\n\n\n# Now we will build the decoder\n\nclass Decoder(nn.Module):\n def __init__(self, E=32, l1_size=32, l2_size=32, l2_reg=0.0):\n super(Decoder, self).__init__()\n self.l1 = nn.Linear(E, l1_size)\n self.l2 = nn.Linear(l1_size, l2_size)\n self.l3 = nn.Linear(l2_size, 1)\n self.l2_reg = l2_reg\n self.relu = nn.ReLU()\n \n def forward(self, x):\n x = self.relu(self.l1(x))\n x = self.relu(self.l2(x))\n x = self.relu(self.l3(x))\n return x\n\n\n# Pass Q query vectors and the context vectors through the decoder\n\ndecoder = Decoder(E =32 + D)\nmean_context = encoder(context_input).mean(axis=0)\n# Repeat the mean context vector Q times\nmean_context = mean_context.repeat(Q, 1)\nmean_context.shape\n\ntorch.Size([2, 32])\n\n\n\nquery = X_train_torch[Q_idx]\nquery.shape\n\ntorch.Size([2, 10])\n\n\n\ninput_decoder = torch.cat((query, mean_context), axis=1)\ninput_decoder.shape\n\ntorch.Size([2, 42])\n\n\n\ndecoder(input_decoder).shape\n\ntorch.Size([2, 1])\n\n\n\n# Building a meta learning class\n\nclass Meta(nn.Module):\n def __init__(self, D = X_train_torch.shape[1], E = 32, C = 10, Q = 2, l1_size=32, l2_size=32, l2_reg=0.0):\n super(Meta, self).__init__()\n self.encoder = Encoder(D + 1, l1_size, l2_size, E, l2_reg)\n self.decoder = Decoder(E + D, l1_size, l2_size, l2_reg)\n self.D = D\n self.E = E\n self.C = C\n self.Q = Q\n self.l2_reg = l2_reg\n\n def forward(self, context_input, query):\n mean_context = self.encoder(context_input).mean(axis=0)\n # Repeat the mean context vector Q times\n mean_context = mean_context.repeat(self.Q, 1)\n input_decoder = torch.cat((query, mean_context), axis=1)\n return self.decoder(input_decoder)\n\n\nm = Meta()\nm(context_input, query)\n\ntensor([[0.],\n [0.]], grad_fn=<ReluBackward0>)\n\n\n\n# Create a function to get the context and query vectors\n\ndef get_context_query(X, y, C, Q):\n C_idx = np.random.choice(X.shape[0], C, replace=False)\n C_idx = torch.tensor(C_idx)\n all_idx = np.arange(X.shape[0])\n Q_idx = np.random.choice(np.setdiff1d(all_idx, C_idx), Q, replace=False)\n Q_idx = torch.tensor(Q_idx)\n context_input = torch.cat((X[C_idx], y[C_idx].reshape(-1, 1)), axis=1)\n query = X[Q_idx]\n return context_input, query, y[Q_idx]\n\n\nget_context_query(X_train_torch, y_train_torch, 10, 2)\n\n(tensor([[-6.0003e-02, -4.4642e-02, 1.3387e-03, -2.9771e-02, -7.0728e-03,\n -2.1669e-02, 1.1824e-02, -2.5923e-03, 3.1815e-02, -5.4925e-02,\n 2.5800e+02],\n [ 3.0811e-02, -4.4642e-02, -5.0396e-02, -2.2277e-03, -4.4223e-02,\n -8.9935e-02, 1.1859e-01, -7.6395e-02, -1.8118e-02, 3.0644e-03,\n 8.7000e+01],\n [ 6.7136e-02, -4.4642e-02, 3.4944e-03, 3.5644e-02, 4.9341e-02,\n 3.1254e-02, 7.0730e-02, -3.9493e-02, -6.0925e-04, 1.9633e-02,\n 7.3000e+01],\n [ 1.2648e-02, -4.4642e-02, 6.7278e-03, -5.6166e-02, -7.5870e-02,\n -6.6449e-02, -2.1311e-02, -3.7648e-02, -1.8118e-02, -9.2204e-02,\n 1.8200e+02],\n [-6.3635e-02, 5.0680e-02, -7.9497e-02, -5.6706e-03, -7.1743e-02,\n -6.6449e-02, -1.0266e-02, -3.9493e-02, -1.8118e-02, -5.4925e-02,\n 1.0100e+02],\n [-1.8820e-03, -4.4642e-02, -6.6563e-02, 1.2151e-03, -2.9449e-03,\n 3.0702e-03, 1.1824e-02, -2.5923e-03, -2.0289e-02, -2.5930e-02,\n 7.9000e+01],\n [-5.6370e-02, -4.4642e-02, -7.4108e-02, -5.0428e-02, -2.4960e-02,\n -4.7034e-02, 9.2820e-02, -7.6395e-02, -6.1177e-02, -4.6641e-02,\n 4.8000e+01],\n [-7.0900e-02, 5.0680e-02, -7.5186e-02, -4.0099e-02, -5.1103e-02,\n -1.5092e-02, -3.9719e-02, -2.5923e-03, -9.6433e-02, -3.4215e-02,\n 5.5000e+01],\n [ 3.8076e-02, 5.0680e-02, -2.9918e-02, -4.0099e-02, -3.3216e-02,\n -2.4174e-02, -1.0266e-02, -2.5923e-03, -1.2908e-02, 3.0644e-03,\n 1.6000e+02],\n [ 3.4443e-02, 5.0680e-02, 1.2529e-01, 2.8758e-02, -5.3855e-02,\n -1.2900e-02, -1.0231e-01, 1.0811e-01, 2.7149e-04, 2.7917e-02,\n 3.4100e+02]]),\n tensor([[ 0.0163, -0.0446, 0.0175, -0.0229, 0.0603, 0.0444, 0.0302, -0.0026,\n 0.0372, -0.0011],\n [-0.0418, -0.0446, 0.0477, 0.0597, 0.1278, 0.1280, -0.0250, 0.1081,\n 0.0639, 0.0403]]),\n tensor([128., 258.]))\n\n\n\n# Training loop\n\ntrain_loss = []\nval_losses = []\ntest_losses = []\nfor i in range(2000):\n context_input, query, y_query = get_context_query(X_train_torch, y_train_scaled, 10, 2)\n y_pred = m(context_input, query)\n loss = criterion(y_pred, y_query)\n optimizer.zero_grad()\n loss.backward()\n optimizer.step()\n train_loss.append(loss.item())\n if i % 100 == 0:\n print(f\"Epoch {i} - Train loss: {loss.item()}\")\n\n\nEpoch 0 - Train loss: 0.2789093554019928\nEpoch 100 - Train loss: 1.1788500547409058\nEpoch 200 - Train loss: 0.6084457039833069\nEpoch 300 - Train loss: 0.6604040265083313\nEpoch 400 - Train loss: 0.6397659778594971\nEpoch 500 - Train loss: 0.18253237009048462\nEpoch 600 - Train loss: 0.4401477575302124\nEpoch 700 - Train loss: 0.19387057423591614\nEpoch 800 - Train loss: 1.3166064023971558\nEpoch 900 - Train loss: 0.4808962345123291\nEpoch 1000 - Train loss: 0.2843365967273712\nEpoch 1100 - Train loss: 0.35177987813949585\nEpoch 1200 - Train loss: 1.1539983749389648\nEpoch 1300 - Train loss: 2.0983378887176514\nEpoch 1400 - Train loss: 3.325526237487793\nEpoch 1500 - Train loss: 0.38105207681655884\nEpoch 1600 - Train loss: 0.16427072882652283\nEpoch 1700 - Train loss: 1.9909170866012573\nEpoch 1800 - Train loss: 0.03792643919587135\nEpoch 1900 - Train loss: 2.758319854736328\n\n\n\nplt.plot(train_loss)\n\n\n\n\n\n\n\n\n\ncontext_input, _, _ = get_context_query(X_train_torch, y_train_torch, 10, query.shape[0])\ncontext_input.shape\n\ntorch.Size([10, 11])\n\n\n\nX_test_torch.shape\n\ntorch.Size([111, 10])\n\n\n\nquery.shape\n\ntorch.Size([2, 10])\n\n\n\ncontext_input.shape\n\ntorch.Size([10, 11])\n\n\n\n# Use whole of the test set as the query set\nquery = X_test_torch\ny_query = y_test_torch\n\n# get context from the training set\ncontext_input, _, _ = get_context_query(X_train_torch, y_train_scaled, 10, query.shape[0])\n\n\nm = Meta(C=10, Q=query.shape[0])\n\ny_pred = m(context_input, query)\n\n# Use inverse transform to get the original values\n\ny_pred = scaler.inverse_transform(y_pred.detach().numpy())\nprint(f\"Test loss: {np.sqrt(mean_squared_error(y_query, y_pred))}\")\n\nTest loss: 75.22261810302734\n\n\n\npd.DataFrame({\"y_pred\": y_pred.reshape(-1), \"y_query\": y_query})\n\n\n\n\n\n\n\n\n\ny_pred\ny_query\n\n\n\n\n0\n169.061569\n78.0\n\n\n1\n168.179596\n152.0\n\n\n2\n168.527161\n200.0\n\n\n3\n168.452423\n59.0\n\n\n4\n168.826492\n311.0\n\n\n...\n...\n...\n\n\n106\n169.100494\n281.0\n\n\n107\n168.457611\n281.0\n\n\n108\n169.019516\n214.0\n\n\n109\n168.639725\n96.0\n\n\n110\n169.026901\n146.0\n\n\n\n\n111 rows × 2 columns" + "text": "### Decoder outputs\n\nDecoding: Decoder is a neural network that takes in the query vector and encoder summary and outputs a vector of shape (Q, 1)\n\n\nD = X_train_torch.shape[1]\n\n\n# Building the encoder \n# Takes as input the number of features, the number of nodes in 3 hidden layers, and the L2 regularization parameter\n\nclass Encoder(nn.Module):\n def __init__(self, n_features = D+1, l1_size=32, l2_size=32, E = 32, l2_reg=0.0):\n super(Encoder, self).__init__()\n self.l1 = nn.Linear(n_features, l1_size)\n self.l2 = nn.Linear(l1_size, l2_size)\n self.l3 = nn.Linear(l2_size, E)\n self.l2_reg = l2_reg\n self.relu = nn.ReLU()\n \n def forward(self, x):\n x = self.relu(self.l1(x))\n x = self.relu(self.l2(x))\n x = self.relu(self.l3(x))\n return x\n\n\n# Create C context vectors and pass them through the encoder\n\nC = 5\nQ = 2\nencoder = Encoder(E=32)\n\n# Create the context vectors input\nC_idx = np.random.choice(X_train_torch.shape[0], C, replace=False)\nC_idx = torch.tensor(C_idx)\n\n# Get the Q query vectors\nall_idx = np.arange(X_train_torch.shape[0])\nQ_idx = np.random.choice(np.setdiff1d(all_idx, C_idx), Q, replace=False)\nQ_idx = torch.tensor(Q_idx)\n\nC_idx, Q_idx\n\n(tensor([108, 26, 25, 210, 151]), tensor([156, 230]))\n\n\n\nX_train_torch[C_idx], y_train_scaled[C_idx], y_train_torch[C_idx]\n\n(tensor([[-0.0019, -0.0446, 0.0542, -0.0665, 0.0727, 0.0566, -0.0434, 0.0849,\n 0.0845, 0.0486],\n [ 0.0817, 0.0507, 0.0013, 0.0356, 0.1264, 0.0911, 0.0192, 0.0343,\n 0.0845, -0.0301],\n [ 0.0126, -0.0446, 0.0067, -0.0562, -0.0759, -0.0664, -0.0213, -0.0376,\n -0.0181, -0.0922],\n [ 0.0272, -0.0446, 0.0067, 0.0356, 0.0796, 0.0707, 0.0155, 0.0343,\n 0.0407, 0.0113],\n [-0.0709, 0.0507, -0.0752, -0.0401, -0.0511, -0.0151, -0.0397, -0.0026,\n -0.0964, -0.0342]]),\n tensor([[ 0.4966],\n [ 0.5471],\n [ 0.3704],\n [-1.0812],\n [-1.2327]]),\n tensor([192., 196., 182., 67., 55.]))\n\n\n\ncontext_input = torch.cat((X_train_torch[C_idx], y_train_scaled[C_idx].reshape(-1, 1)), axis=1)\npd.DataFrame(context_input)\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n\n\n\n\n0\n-0.001882\n-0.044642\n0.054152\n-0.066495\n0.072732\n0.056619\n-0.043401\n0.084863\n0.084495\n0.048628\n0.496595\n\n\n1\n0.081666\n0.050680\n0.001339\n0.035644\n0.126395\n0.091065\n0.019187\n0.034309\n0.084495\n-0.030072\n0.547084\n\n\n2\n0.012648\n-0.044642\n0.006728\n-0.056166\n-0.075870\n-0.066449\n-0.021311\n-0.037648\n-0.018118\n-0.092204\n0.370372\n\n\n3\n0.027178\n-0.044642\n0.006728\n0.035644\n0.079612\n0.070710\n0.015505\n0.034309\n0.040672\n0.011349\n-1.081190\n\n\n4\n-0.070900\n0.050680\n-0.075186\n-0.040099\n-0.051103\n-0.015092\n-0.039719\n-0.002592\n-0.096433\n-0.034215\n-1.232658\n\n\n\n\n\n\n\n\nencoder(context_input).shape\n\ntorch.Size([5, 32])\n\n\n\nencoder(context_input).mean(axis=0).shape\n\ntorch.Size([32])\n\n\n\n# Now we will build the decoder\n\nclass Decoder(nn.Module):\n def __init__(self, E=32, l1_size=32, l2_size=32, l2_reg=0.0):\n super(Decoder, self).__init__()\n self.l1 = nn.Linear(E, l1_size)\n self.l2 = nn.Linear(l1_size, l2_size)\n self.l3 = nn.Linear(l2_size, 1)\n self.l2_reg = l2_reg\n self.relu = nn.ReLU()\n \n def forward(self, x):\n x = self.relu(self.l1(x))\n x = self.relu(self.l2(x))\n x = self.relu(self.l3(x))\n return x\n\n\n# Pass Q query vectors and the context vectors through the decoder\n\ndecoder = Decoder(E =32 + D)\nmean_context = encoder(context_input).mean(axis=0)\n# Repeat the mean context vector Q times\nmean_context = mean_context.repeat(Q, 1)\nmean_context.shape\n\ntorch.Size([2, 32])\n\n\n\nquery = X_train_torch[Q_idx]\nquery.shape\n\ntorch.Size([2, 10])\n\n\n\ninput_decoder = torch.cat((query, mean_context), axis=1)\ninput_decoder.shape\n\ntorch.Size([2, 42])\n\n\n\ndecoder(input_decoder).shape\n\ntorch.Size([2, 1])\n\n\n\n# Building a meta learning class\n\nclass Meta(nn.Module):\n def __init__(self, D = X_train_torch.shape[1], E = 32, C = 10, Q = 2, l1_size=32, l2_size=32, l2_reg=0.0):\n super(Meta, self).__init__()\n self.encoder = Encoder(D + 1, l1_size, l2_size, E, l2_reg)\n self.decoder = Decoder(E + D, l1_size, l2_size, l2_reg)\n self.D = D\n self.E = E\n self.C = C\n self.Q = Q\n self.l2_reg = l2_reg\n\n def forward(self, context_input, query):\n mean_context = self.encoder(context_input).mean(axis=0)\n # Repeat the mean context vector Q times\n mean_context = mean_context.repeat(self.Q, 1)\n input_decoder = torch.cat((query, mean_context), axis=1)\n return self.decoder(input_decoder)\n\n\nm = Meta()\nm(context_input, query)\n\ntensor([[0.],\n [0.]], grad_fn=<ReluBackward0>)\n\n\n\n# Create a function to get the context and query vectors\n\ndef get_context_query(X, y, C, Q):\n C_idx = np.random.choice(X.shape[0], C, replace=False)\n C_idx = torch.tensor(C_idx)\n all_idx = np.arange(X.shape[0])\n Q_idx = np.random.choice(np.setdiff1d(all_idx, C_idx), Q, replace=False)\n Q_idx = torch.tensor(Q_idx)\n context_input = torch.cat((X[C_idx], y[C_idx].reshape(-1, 1)), axis=1)\n query = X[Q_idx]\n return context_input, query, y[Q_idx]\n\n\nget_context_query(X_train_torch, y_train_torch, 10, 2)\n\n(tensor([[-6.0003e-02, -4.4642e-02, 1.3387e-03, -2.9771e-02, -7.0728e-03,\n -2.1669e-02, 1.1824e-02, -2.5923e-03, 3.1815e-02, -5.4925e-02,\n 2.5800e+02],\n [ 3.0811e-02, -4.4642e-02, -5.0396e-02, -2.2277e-03, -4.4223e-02,\n -8.9935e-02, 1.1859e-01, -7.6395e-02, -1.8118e-02, 3.0644e-03,\n 8.7000e+01],\n [ 6.7136e-02, -4.4642e-02, 3.4944e-03, 3.5644e-02, 4.9341e-02,\n 3.1254e-02, 7.0730e-02, -3.9493e-02, -6.0925e-04, 1.9633e-02,\n 7.3000e+01],\n [ 1.2648e-02, -4.4642e-02, 6.7278e-03, -5.6166e-02, -7.5870e-02,\n -6.6449e-02, -2.1311e-02, -3.7648e-02, -1.8118e-02, -9.2204e-02,\n 1.8200e+02],\n [-6.3635e-02, 5.0680e-02, -7.9497e-02, -5.6706e-03, -7.1743e-02,\n -6.6449e-02, -1.0266e-02, -3.9493e-02, -1.8118e-02, -5.4925e-02,\n 1.0100e+02],\n [-1.8820e-03, -4.4642e-02, -6.6563e-02, 1.2151e-03, -2.9449e-03,\n 3.0702e-03, 1.1824e-02, -2.5923e-03, -2.0289e-02, -2.5930e-02,\n 7.9000e+01],\n [-5.6370e-02, -4.4642e-02, -7.4108e-02, -5.0428e-02, -2.4960e-02,\n -4.7034e-02, 9.2820e-02, -7.6395e-02, -6.1177e-02, -4.6641e-02,\n 4.8000e+01],\n [-7.0900e-02, 5.0680e-02, -7.5186e-02, -4.0099e-02, -5.1103e-02,\n -1.5092e-02, -3.9719e-02, -2.5923e-03, -9.6433e-02, -3.4215e-02,\n 5.5000e+01],\n [ 3.8076e-02, 5.0680e-02, -2.9918e-02, -4.0099e-02, -3.3216e-02,\n -2.4174e-02, -1.0266e-02, -2.5923e-03, -1.2908e-02, 3.0644e-03,\n 1.6000e+02],\n [ 3.4443e-02, 5.0680e-02, 1.2529e-01, 2.8758e-02, -5.3855e-02,\n -1.2900e-02, -1.0231e-01, 1.0811e-01, 2.7149e-04, 2.7917e-02,\n 3.4100e+02]]),\n tensor([[ 0.0163, -0.0446, 0.0175, -0.0229, 0.0603, 0.0444, 0.0302, -0.0026,\n 0.0372, -0.0011],\n [-0.0418, -0.0446, 0.0477, 0.0597, 0.1278, 0.1280, -0.0250, 0.1081,\n 0.0639, 0.0403]]),\n tensor([128., 258.]))\n\n\n\n# Training loop\n\ntrain_loss = []\nval_losses = []\ntest_losses = []\nfor i in range(2000):\n context_input, query, y_query = get_context_query(X_train_torch, y_train_scaled, 10, 2)\n y_pred = m(context_input, query)\n loss = criterion(y_pred, y_query)\n optimizer.zero_grad()\n loss.backward()\n optimizer.step()\n train_loss.append(loss.item())\n if i % 100 == 0:\n print(f\"Epoch {i} - Train loss: {loss.item()}\")\n\n\nEpoch 0 - Train loss: 0.2789093554019928\nEpoch 100 - Train loss: 1.1788500547409058\nEpoch 200 - Train loss: 0.6084457039833069\nEpoch 300 - Train loss: 0.6604040265083313\nEpoch 400 - Train loss: 0.6397659778594971\nEpoch 500 - Train loss: 0.18253237009048462\nEpoch 600 - Train loss: 0.4401477575302124\nEpoch 700 - Train loss: 0.19387057423591614\nEpoch 800 - Train loss: 1.3166064023971558\nEpoch 900 - Train loss: 0.4808962345123291\nEpoch 1000 - Train loss: 0.2843365967273712\nEpoch 1100 - Train loss: 0.35177987813949585\nEpoch 1200 - Train loss: 1.1539983749389648\nEpoch 1300 - Train loss: 2.0983378887176514\nEpoch 1400 - Train loss: 3.325526237487793\nEpoch 1500 - Train loss: 0.38105207681655884\nEpoch 1600 - Train loss: 0.16427072882652283\nEpoch 1700 - Train loss: 1.9909170866012573\nEpoch 1800 - Train loss: 0.03792643919587135\nEpoch 1900 - Train loss: 2.758319854736328\n\n\n\nplt.plot(train_loss)\n\n\n\n\n\n\n\n\n\ncontext_input, _, _ = get_context_query(X_train_torch, y_train_torch, 10, query.shape[0])\ncontext_input.shape\n\ntorch.Size([10, 11])\n\n\n\nX_test_torch.shape\n\ntorch.Size([111, 10])\n\n\n\nquery.shape\n\ntorch.Size([2, 10])\n\n\n\ncontext_input.shape\n\ntorch.Size([10, 11])\n\n\n\n# Use whole of the test set as the query set\nquery = X_test_torch\ny_query = y_test_torch\n\n# get context from the training set\ncontext_input, _, _ = get_context_query(X_train_torch, y_train_scaled, 10, query.shape[0])\n\n\nm = Meta(C=10, Q=query.shape[0])\n\ny_pred = m(context_input, query)\n\n# Use inverse transform to get the original values\n\ny_pred = scaler.inverse_transform(y_pred.detach().numpy())\nprint(f\"Test loss: {np.sqrt(mean_squared_error(y_query, y_pred))}\")\n\nTest loss: 75.22261810302734\n\n\n\npd.DataFrame({\"y_pred\": y_pred.reshape(-1), \"y_query\": y_query})\n\n\n\n\n\n\n\n\ny_pred\ny_query\n\n\n\n\n0\n169.061569\n78.0\n\n\n1\n168.179596\n152.0\n\n\n2\n168.527161\n200.0\n\n\n3\n168.452423\n59.0\n\n\n4\n168.826492\n311.0\n\n\n...\n...\n...\n\n\n106\n169.100494\n281.0\n\n\n107\n168.457611\n281.0\n\n\n108\n169.019516\n214.0\n\n\n109\n168.639725\n96.0\n\n\n110\n169.026901\n146.0\n\n\n\n\n111 rows × 2 columns" }, { "objectID": "posts/2022-02-24-audio-filtering.html", @@ -641,7 +648,7 @@ "href": "posts/2024-attention.html", "title": "Attention", "section": "", - "text": "import numpy as np\nimport matplotlib.pyplot as plt\n%matplotlib inline\n%config InlineBackend.figure_format = 'retina'\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom einops import rearrange, reduce, repeat\n\n\n!wget https://raw.githubusercontent.com/MASTREX/List-of-Indian-Names/master/2.%20First.txt -O names-indian.txt\n\n--2024-05-30 09:41:48-- https://raw.githubusercontent.com/MASTREX/List-of-Indian-Names/master/2.%20First.txt\nResolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.108.133, ...\nConnecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 8752 (8.5K) [text/plain]\nSaving to: ‘names-indian.txt’\n\nnames-indian.txt 100%[===================>] 8.55K --.-KB/s in 0s \n\n2024-05-30 09:41:49 (33.8 MB/s) - ‘names-indian.txt’ saved [8752/8752]\n\n\n\n\nimport pandas as pd\npd.read_csv('names-indian.txt', header=None)\n\n\n\n\n\n\n\n\n\n0\n\n\n\n\n0\nAbhishek\n\n\n1\nAman\n\n\n2\nHarsh\n\n\n3\nAyush\n\n\n4\nAditi\n\n\n...\n...\n\n\n1160\nPrasoon\n\n\n1161\nMadhusudan\n\n\n1162\nPrastuti\n\n\n1163\nRampratap\n\n\n1164\nMadhukar\n\n\n\n\n1165 rows × 1 columns\n\n\n\n\n\n# convert all names to lowercase\nnames = pd.read_csv('names-indian.txt', header=None)[0].str.lower().values\n\n\nnames\n\narray(['abhishek', 'aman', 'harsh', ..., 'prastuti', 'rampratap',\n 'madhukar'], dtype=object)\n\n\n\n# KDE plot of name lengths\nplt.figure(figsize=(8, 4))\nplt.hist([len(name) for name in names], bins=range(1, 20), density=True, alpha=0.7)\nplt.xlabel('Name length')\nplt.ylabel('Density')\n\nText(0, 0.5, 'Density')\n\n\n\n\n\n\n\n\n\n\n# Attach START and END tokens to each name. Need to add these two to the vocabulary.\nstart_symbol = '^'\nend_symbol = '$'\n\nnames = [start_symbol + name + end_symbol for name in names]\nnames[:5]\n\n['^abhishek$', '^aman$', '^harsh$', '^ayush$', '^aditi$']\n\n\n\n# Find unique characters in the dataset\nvocab = set(''.join(names))\nvocab = sorted(vocab)\nprint(vocab, len(vocab))\n\n['$', '^', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] 28\n\n\n\n# Create a d dimensional lookup table for each character in the vocabulary\nclass CharTable:\n def __init__(self, vocab):\n self.vocab = vocab\n self.char2index = {c: i for i, c in enumerate(vocab)}\n self.index2char = {i: c for i, c in enumerate(vocab)}\n self.vocab_size = len(vocab)\n \n def encode(self, name):\n return torch.tensor([self.char2index[c] for c in name])\n \n def decode(self, tensor):\n if type(tensor) == torch.Tensor:\n tensor = tensor.cpu().numpy()\n return ''.join([self.index2char[i] for i in tensor])\n\n\nct = CharTable(vocab)\n\nLet us process the first name in the dataset\n\n# create embedding layer\nclass CharEmbedding(nn.Module):\n def __init__(self, vocab_size, embed_size):\n super(CharEmbedding, self).__init__()\n self.embedding = nn.Embedding(vocab_size, embed_size)\n \n def forward(self, x):\n return self.embedding(x)\n\nembedding_dim = 8\nchar_embedding = CharEmbedding(ct.vocab_size, embedding_dim )\n\n\nname = names[0]\n\nencoding = ct.encode(name)\nprint(name, encoding, ct.decode(encoding), char_embedding(encoding))\n\n^abhishek$ tensor([ 1, 2, 3, 9, 10, 20, 9, 6, 12, 0]) ^abhishek$ tensor([[-1.3499, -0.8886, -0.6833, -2.4340, -0.3476, -0.2824, 0.3694, -1.2859],\n [ 0.6961, -0.3760, -1.1183, 2.2782, -1.3446, 0.2088, 0.4919, -0.1777],\n [-0.3584, 0.3688, 0.3429, 0.2168, 2.0347, -1.5288, -0.4697, -0.3612],\n [-1.5195, 2.0605, 0.2935, 0.0287, 0.2705, -1.4502, -0.6650, 0.3143],\n [-0.6630, 0.0302, 0.4576, 0.3651, -1.6005, -0.5861, -1.9137, -0.4006],\n [ 0.0552, 0.1175, 1.7384, 0.0691, 1.1881, 0.1506, -0.3215, 2.6402],\n [-1.5195, 2.0605, 0.2935, 0.0287, 0.2705, -1.4502, -0.6650, 0.3143],\n [-0.1919, 1.4137, 0.0158, -0.0030, -0.6109, 0.4661, -0.1131, 0.2733],\n [ 0.8686, 0.3222, -0.2661, 2.1850, -1.3195, -0.6661, 0.8780, 0.2122],\n [ 0.6729, 0.4587, -0.3165, 1.4831, 0.1030, -1.4689, 0.4894, 1.2956]],\n grad_fn=<EmbeddingBackward0>)\n\n\n\nprint(char_embedding(encoding).shape)\n\ntorch.Size([10, 8])\n\n\n\nxs=[]\nfor i in range(len(name)):\n xs.append(char_embedding(ct.encode(name[i])))\n\n\nlength_name = len(name)\n\n\nd = 4\nval_linear = nn.Linear(embedding_dim, embedding_dim)\n\nquery_linear = nn.Linear(embedding_dim, d)\nkey_linear = nn.Linear(embedding_dim, d)\n\n\nvs = []\nfor i in range(length_name):\n vs.append(val_linear(xs[i]))\n\n\nvs\n\n[tensor([[-0.5005, 1.1128, 0.8048, 0.3994, 0.8465, -1.2007, -0.3687, 0.2159]],\n grad_fn=<AddmmBackward0>),\n tensor([[-0.2514, -0.1905, -0.5204, 0.0249, -0.1457, 0.2114, 0.3625, 0.5944]],\n grad_fn=<AddmmBackward0>),\n tensor([[ 0.2653, -0.4796, -0.9962, 0.3799, 0.1251, 0.3504, 0.2554, -0.4853]],\n grad_fn=<AddmmBackward0>),\n tensor([[ 0.1296, -0.0862, -0.9042, -0.4130, -0.2025, -0.7218, 0.4927, -0.0048]],\n grad_fn=<AddmmBackward0>),\n tensor([[-1.1825, 0.4473, -0.7623, -0.5004, -0.6020, -0.9123, -0.4412, 0.3128]],\n grad_fn=<AddmmBackward0>),\n tensor([[ 0.5342, -0.9138, -0.6400, -0.0377, -0.0354, 0.3041, -1.2578, 0.3234]],\n grad_fn=<AddmmBackward0>),\n tensor([[ 0.1296, -0.0862, -0.9042, -0.4130, -0.2025, -0.7218, 0.4927, -0.0048]],\n grad_fn=<AddmmBackward0>),\n tensor([[-0.0751, -0.1357, -0.5498, -0.0227, 0.0025, -0.0453, -0.2706, -0.0690]],\n grad_fn=<AddmmBackward0>),\n tensor([[ 0.1425, -0.2478, -0.5700, -0.0055, -0.2560, 0.2981, 0.7119, 0.5840]],\n grad_fn=<AddmmBackward0>),\n tensor([[ 0.9592, -0.7415, -0.7288, -0.1082, 0.1099, -0.0595, 0.4140, 0.6418]],\n grad_fn=<AddmmBackward0>)]\n\n\n\nqs = []\nfor i in range(length_name):\n qs.append(query_linear(xs[i]))\n\nks = []\nfor i in range(length_name):\n ks.append(key_linear(xs[i]))\n \n\n\nqs\n\n[tensor([[-0.5431, -0.8826, -2.0655, 0.3620]], grad_fn=<AddmmBackward0>),\n tensor([[ 0.2952, -0.6107, -0.4607, 1.6180]], grad_fn=<AddmmBackward0>),\n tensor([[0.3232, 0.1415, 0.1938, 0.1639]], grad_fn=<AddmmBackward0>),\n tensor([[-0.0149, -0.6881, -0.9877, 0.9795]], grad_fn=<AddmmBackward0>),\n tensor([[ 1.0182, 0.6256, 1.4679, -0.5539]], grad_fn=<AddmmBackward0>),\n tensor([[-0.2207, -0.9287, -0.9676, 0.4366]], grad_fn=<AddmmBackward0>),\n tensor([[-0.0149, -0.6881, -0.9877, 0.9795]], grad_fn=<AddmmBackward0>),\n tensor([[-0.6503, -1.5170, -0.6902, 1.8153]], grad_fn=<AddmmBackward0>),\n tensor([[-0.2690, -0.6013, -0.9059, 0.4749]], grad_fn=<AddmmBackward0>),\n tensor([[ 0.2936, -0.5395, -0.8663, 0.6923]], grad_fn=<AddmmBackward0>)]\n\n\n\nks\n\n[tensor([[-0.0686, -0.6523, -0.3398, -0.2891]], grad_fn=<AddmmBackward0>),\n tensor([[-1.8098, -0.3927, -0.2086, 0.4891]], grad_fn=<AddmmBackward0>),\n tensor([[-0.5030, 0.1248, -0.1280, -0.0116]], grad_fn=<AddmmBackward0>),\n tensor([[-0.9497, -0.3944, -0.1638, 0.1935]], grad_fn=<AddmmBackward0>),\n tensor([[0.2378, 0.7928, 0.6968, 0.3017]], grad_fn=<AddmmBackward0>),\n tensor([[-0.0548, 0.0063, 0.2924, 0.2715]], grad_fn=<AddmmBackward0>),\n tensor([[-0.9497, -0.3944, -0.1638, 0.1935]], grad_fn=<AddmmBackward0>),\n tensor([[-1.5675, 0.1323, -0.1190, 0.7133]], grad_fn=<AddmmBackward0>),\n tensor([[-0.4218, -0.1489, -0.2049, -0.0142]], grad_fn=<AddmmBackward0>),\n tensor([[-0.5909, -0.3664, 0.1543, 0.2502]], grad_fn=<AddmmBackward0>)]\n\n\n\nattns = torch.zeros(length_name, length_name)\nfor i in range(length_name):\n for j in range(length_name):\n attns[i, j] = torch.matmul(qs[i], ks[j].T)\n\n\nattns\n\ntensor([[ 1.2102, 1.9374, 0.4234, 1.2723, -2.1590, -0.4814, 1.2723, 1.2385,\n 0.7785, 0.4162],\n [ 0.0670, 0.5930, -0.1845, 0.3490, -0.2468, 0.2845, 0.3490, 0.6654,\n 0.0378, 0.3831],\n [-0.2277, -0.6008, -0.1716, -0.3628, 0.3736, 0.0843, -0.3628, -0.3941,\n -0.1994, -0.1719],\n [ 0.5024, 0.9822, 0.0367, 0.6368, -0.9418, -0.0264, 0.6368, 0.7484,\n 0.2971, 0.3536],\n [-0.8166, -2.6654, -0.6156, -1.5613, 1.5939, 0.2270, -1.5613, -2.0829,\n -0.8154, -0.7429],\n [ 0.8236, 1.1794, 0.1140, 0.8188, -1.3313, -0.1581, 0.8188, 0.6496,\n 0.4234, 0.4306],\n [ 0.5024, 0.9822, 0.0367, 0.6368, -0.9418, -0.0264, 0.6368, 0.7484,\n 0.2971, 0.3536],\n [ 0.7440, 2.8044, 0.2052, 1.6802, -1.2906, 0.3171, 1.6802, 2.1955,\n 0.6157, 1.2878],\n [ 0.5812, 1.1442, 0.1708, 0.7329, -1.0286, -0.1250, 0.7329, 0.7886,\n 0.3818, 0.3583],\n [ 0.4260, 0.1997, -0.1121, 0.2098, -0.7526, -0.0848, 0.2098, 0.0653,\n 0.1241, 0.0637]], grad_fn=<CopySlices>)\n\n\n\n# applt softmax to get attention weights\nattns = F.softmax(attns, dim=-1)\nattns\n\ntensor([[0.1023, 0.1169, 0.0955, 0.1031, 0.0907, 0.0924, 0.1031, 0.1026, 0.0979,\n 0.0955],\n [0.0981, 0.1038, 0.0964, 0.1008, 0.0960, 0.1001, 0.1008, 0.1049, 0.0979,\n 0.1012],\n [0.0994, 0.0965, 0.0999, 0.0982, 0.1074, 0.1029, 0.0982, 0.0980, 0.0996,\n 0.0999],\n [0.1007, 0.1075, 0.0967, 0.1022, 0.0927, 0.0963, 0.1022, 0.1037, 0.0987,\n 0.0992],\n [0.0938, 0.0899, 0.0949, 0.0914, 0.1571, 0.1030, 0.0914, 0.0905, 0.0938,\n 0.0942],\n [0.1031, 0.1091, 0.0964, 0.1031, 0.0918, 0.0950, 0.1031, 0.1010, 0.0987,\n 0.0988],\n [0.1007, 0.1075, 0.0967, 0.1022, 0.0927, 0.0963, 0.1022, 0.1037, 0.0987,\n 0.0992],\n [0.0942, 0.1282, 0.0924, 0.1010, 0.0905, 0.0927, 0.1010, 0.1091, 0.0937,\n 0.0973],\n [0.1007, 0.1092, 0.0971, 0.1025, 0.0924, 0.0953, 0.1025, 0.1032, 0.0987,\n 0.0985],\n [0.1043, 0.1013, 0.0983, 0.1014, 0.0945, 0.0985, 0.1014, 0.0999, 0.1005,\n 0.0999]], grad_fn=<SoftmaxBackward0>)\n\n\n\nplt.imshow(attns.detach().numpy(), cmap='hot', interpolation='nearest')\n\n\n\n\n\n\n\n\n\ndf = pd.DataFrame(attns.detach().numpy())\ndf\n\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n\n\n\n\n0\n0.102279\n0.116902\n0.095548\n0.103101\n0.090652\n0.092365\n0.103101\n0.102647\n0.097894\n0.095509\n\n\n1\n0.098137\n0.103838\n0.096373\n0.100781\n0.096004\n0.100102\n0.100781\n0.104920\n0.097907\n0.101158\n\n\n2\n0.099379\n0.096506\n0.099920\n0.098203\n0.107403\n0.102867\n0.098203\n0.097955\n0.099648\n0.099917\n\n\n3\n0.100661\n0.107542\n0.096716\n0.102229\n0.092731\n0.096318\n0.102229\n0.103722\n0.098675\n0.099177\n\n\n4\n0.093831\n0.089897\n0.094899\n0.091359\n0.157125\n0.103029\n0.091359\n0.090467\n0.093837\n0.094197\n\n\n5\n0.103120\n0.109096\n0.096438\n0.103056\n0.091776\n0.094960\n0.103056\n0.100971\n0.098732\n0.098796\n\n\n6\n0.100661\n0.107542\n0.096716\n0.102229\n0.092731\n0.096318\n0.102229\n0.103722\n0.098675\n0.099177\n\n\n7\n0.094160\n0.128211\n0.092408\n0.100973\n0.090542\n0.092696\n0.100973\n0.109109\n0.093650\n0.097276\n\n\n8\n0.100667\n0.109200\n0.097085\n0.102458\n0.092360\n0.095326\n0.102458\n0.103196\n0.098727\n0.098525\n\n\n9\n0.104278\n0.101318\n0.098286\n0.101434\n0.094502\n0.098512\n0.101434\n0.099883\n0.100485\n0.099867\n\n\n\n\n\n\n\n\n\ndf.sum(axis=1)\n\n0 1.0\n1 1.0\n2 1.0\n3 1.0\n4 1.0\n5 1.0\n6 1.0\n7 1.0\n8 1.0\n9 1.0\ndtype: float32\n\n\n\nupdated_embedding_1.shape\n\ntorch.Size([10, 10, 8])\n\n\n\nvs[0].shape\n\ntorch.Size([1, 8])" + "text": "import numpy as np\nimport matplotlib.pyplot as plt\n%matplotlib inline\n%config InlineBackend.figure_format = 'retina'\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom einops import rearrange, reduce, repeat\n\n\n!wget https://raw.githubusercontent.com/MASTREX/List-of-Indian-Names/master/2.%20First.txt -O names-indian.txt\n\n--2024-05-30 09:41:48-- https://raw.githubusercontent.com/MASTREX/List-of-Indian-Names/master/2.%20First.txt\nResolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.108.133, ...\nConnecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 8752 (8.5K) [text/plain]\nSaving to: ‘names-indian.txt’\n\nnames-indian.txt 100%[===================>] 8.55K --.-KB/s in 0s \n\n2024-05-30 09:41:49 (33.8 MB/s) - ‘names-indian.txt’ saved [8752/8752]\n\n\n\n\nimport pandas as pd\npd.read_csv('names-indian.txt', header=None)\n\n\n\n\n\n\n\n\n0\n\n\n\n\n0\nAbhishek\n\n\n1\nAman\n\n\n2\nHarsh\n\n\n3\nAyush\n\n\n4\nAditi\n\n\n...\n...\n\n\n1160\nPrasoon\n\n\n1161\nMadhusudan\n\n\n1162\nPrastuti\n\n\n1163\nRampratap\n\n\n1164\nMadhukar\n\n\n\n\n1165 rows × 1 columns\n\n\n\n\n# convert all names to lowercase\nnames = pd.read_csv('names-indian.txt', header=None)[0].str.lower().values\n\n\nnames\n\narray(['abhishek', 'aman', 'harsh', ..., 'prastuti', 'rampratap',\n 'madhukar'], dtype=object)\n\n\n\n# KDE plot of name lengths\nplt.figure(figsize=(8, 4))\nplt.hist([len(name) for name in names], bins=range(1, 20), density=True, alpha=0.7)\nplt.xlabel('Name length')\nplt.ylabel('Density')\n\nText(0, 0.5, 'Density')\n\n\n\n\n\n\n\n\n\n\n# Attach START and END tokens to each name. Need to add these two to the vocabulary.\nstart_symbol = '^'\nend_symbol = '$'\n\nnames = [start_symbol + name + end_symbol for name in names]\nnames[:5]\n\n['^abhishek$', '^aman$', '^harsh$', '^ayush$', '^aditi$']\n\n\n\n# Find unique characters in the dataset\nvocab = set(''.join(names))\nvocab = sorted(vocab)\nprint(vocab, len(vocab))\n\n['$', '^', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] 28\n\n\n\n# Create a d dimensional lookup table for each character in the vocabulary\nclass CharTable:\n def __init__(self, vocab):\n self.vocab = vocab\n self.char2index = {c: i for i, c in enumerate(vocab)}\n self.index2char = {i: c for i, c in enumerate(vocab)}\n self.vocab_size = len(vocab)\n \n def encode(self, name):\n return torch.tensor([self.char2index[c] for c in name])\n \n def decode(self, tensor):\n if type(tensor) == torch.Tensor:\n tensor = tensor.cpu().numpy()\n return ''.join([self.index2char[i] for i in tensor])\n\n\nct = CharTable(vocab)\n\nLet us process the first name in the dataset\n\n# create embedding layer\nclass CharEmbedding(nn.Module):\n def __init__(self, vocab_size, embed_size):\n super(CharEmbedding, self).__init__()\n self.embedding = nn.Embedding(vocab_size, embed_size)\n \n def forward(self, x):\n return self.embedding(x)\n\nembedding_dim = 8\nchar_embedding = CharEmbedding(ct.vocab_size, embedding_dim )\n\n\nname = names[0]\n\nencoding = ct.encode(name)\nprint(name, encoding, ct.decode(encoding), char_embedding(encoding))\n\n^abhishek$ tensor([ 1, 2, 3, 9, 10, 20, 9, 6, 12, 0]) ^abhishek$ tensor([[-1.3499, -0.8886, -0.6833, -2.4340, -0.3476, -0.2824, 0.3694, -1.2859],\n [ 0.6961, -0.3760, -1.1183, 2.2782, -1.3446, 0.2088, 0.4919, -0.1777],\n [-0.3584, 0.3688, 0.3429, 0.2168, 2.0347, -1.5288, -0.4697, -0.3612],\n [-1.5195, 2.0605, 0.2935, 0.0287, 0.2705, -1.4502, -0.6650, 0.3143],\n [-0.6630, 0.0302, 0.4576, 0.3651, -1.6005, -0.5861, -1.9137, -0.4006],\n [ 0.0552, 0.1175, 1.7384, 0.0691, 1.1881, 0.1506, -0.3215, 2.6402],\n [-1.5195, 2.0605, 0.2935, 0.0287, 0.2705, -1.4502, -0.6650, 0.3143],\n [-0.1919, 1.4137, 0.0158, -0.0030, -0.6109, 0.4661, -0.1131, 0.2733],\n [ 0.8686, 0.3222, -0.2661, 2.1850, -1.3195, -0.6661, 0.8780, 0.2122],\n [ 0.6729, 0.4587, -0.3165, 1.4831, 0.1030, -1.4689, 0.4894, 1.2956]],\n grad_fn=<EmbeddingBackward0>)\n\n\n\nprint(char_embedding(encoding).shape)\n\ntorch.Size([10, 8])\n\n\n\nxs=[]\nfor i in range(len(name)):\n xs.append(char_embedding(ct.encode(name[i])))\n\n\nlength_name = len(name)\n\n\nd = 4\nval_linear = nn.Linear(embedding_dim, embedding_dim)\n\nquery_linear = nn.Linear(embedding_dim, d)\nkey_linear = nn.Linear(embedding_dim, d)\n\n\nvs = []\nfor i in range(length_name):\n vs.append(val_linear(xs[i]))\n\n\nvs\n\n[tensor([[-0.5005, 1.1128, 0.8048, 0.3994, 0.8465, -1.2007, -0.3687, 0.2159]],\n grad_fn=<AddmmBackward0>),\n tensor([[-0.2514, -0.1905, -0.5204, 0.0249, -0.1457, 0.2114, 0.3625, 0.5944]],\n grad_fn=<AddmmBackward0>),\n tensor([[ 0.2653, -0.4796, -0.9962, 0.3799, 0.1251, 0.3504, 0.2554, -0.4853]],\n grad_fn=<AddmmBackward0>),\n tensor([[ 0.1296, -0.0862, -0.9042, -0.4130, -0.2025, -0.7218, 0.4927, -0.0048]],\n grad_fn=<AddmmBackward0>),\n tensor([[-1.1825, 0.4473, -0.7623, -0.5004, -0.6020, -0.9123, -0.4412, 0.3128]],\n grad_fn=<AddmmBackward0>),\n tensor([[ 0.5342, -0.9138, -0.6400, -0.0377, -0.0354, 0.3041, -1.2578, 0.3234]],\n grad_fn=<AddmmBackward0>),\n tensor([[ 0.1296, -0.0862, -0.9042, -0.4130, -0.2025, -0.7218, 0.4927, -0.0048]],\n grad_fn=<AddmmBackward0>),\n tensor([[-0.0751, -0.1357, -0.5498, -0.0227, 0.0025, -0.0453, -0.2706, -0.0690]],\n grad_fn=<AddmmBackward0>),\n tensor([[ 0.1425, -0.2478, -0.5700, -0.0055, -0.2560, 0.2981, 0.7119, 0.5840]],\n grad_fn=<AddmmBackward0>),\n tensor([[ 0.9592, -0.7415, -0.7288, -0.1082, 0.1099, -0.0595, 0.4140, 0.6418]],\n grad_fn=<AddmmBackward0>)]\n\n\n\nqs = []\nfor i in range(length_name):\n qs.append(query_linear(xs[i]))\n\nks = []\nfor i in range(length_name):\n ks.append(key_linear(xs[i]))\n \n\n\nqs\n\n[tensor([[-0.5431, -0.8826, -2.0655, 0.3620]], grad_fn=<AddmmBackward0>),\n tensor([[ 0.2952, -0.6107, -0.4607, 1.6180]], grad_fn=<AddmmBackward0>),\n tensor([[0.3232, 0.1415, 0.1938, 0.1639]], grad_fn=<AddmmBackward0>),\n tensor([[-0.0149, -0.6881, -0.9877, 0.9795]], grad_fn=<AddmmBackward0>),\n tensor([[ 1.0182, 0.6256, 1.4679, -0.5539]], grad_fn=<AddmmBackward0>),\n tensor([[-0.2207, -0.9287, -0.9676, 0.4366]], grad_fn=<AddmmBackward0>),\n tensor([[-0.0149, -0.6881, -0.9877, 0.9795]], grad_fn=<AddmmBackward0>),\n tensor([[-0.6503, -1.5170, -0.6902, 1.8153]], grad_fn=<AddmmBackward0>),\n tensor([[-0.2690, -0.6013, -0.9059, 0.4749]], grad_fn=<AddmmBackward0>),\n tensor([[ 0.2936, -0.5395, -0.8663, 0.6923]], grad_fn=<AddmmBackward0>)]\n\n\n\nks\n\n[tensor([[-0.0686, -0.6523, -0.3398, -0.2891]], grad_fn=<AddmmBackward0>),\n tensor([[-1.8098, -0.3927, -0.2086, 0.4891]], grad_fn=<AddmmBackward0>),\n tensor([[-0.5030, 0.1248, -0.1280, -0.0116]], grad_fn=<AddmmBackward0>),\n tensor([[-0.9497, -0.3944, -0.1638, 0.1935]], grad_fn=<AddmmBackward0>),\n tensor([[0.2378, 0.7928, 0.6968, 0.3017]], grad_fn=<AddmmBackward0>),\n tensor([[-0.0548, 0.0063, 0.2924, 0.2715]], grad_fn=<AddmmBackward0>),\n tensor([[-0.9497, -0.3944, -0.1638, 0.1935]], grad_fn=<AddmmBackward0>),\n tensor([[-1.5675, 0.1323, -0.1190, 0.7133]], grad_fn=<AddmmBackward0>),\n tensor([[-0.4218, -0.1489, -0.2049, -0.0142]], grad_fn=<AddmmBackward0>),\n tensor([[-0.5909, -0.3664, 0.1543, 0.2502]], grad_fn=<AddmmBackward0>)]\n\n\n\nattns = torch.zeros(length_name, length_name)\nfor i in range(length_name):\n for j in range(length_name):\n attns[i, j] = torch.matmul(qs[i], ks[j].T)\n\n\nattns\n\ntensor([[ 1.2102, 1.9374, 0.4234, 1.2723, -2.1590, -0.4814, 1.2723, 1.2385,\n 0.7785, 0.4162],\n [ 0.0670, 0.5930, -0.1845, 0.3490, -0.2468, 0.2845, 0.3490, 0.6654,\n 0.0378, 0.3831],\n [-0.2277, -0.6008, -0.1716, -0.3628, 0.3736, 0.0843, -0.3628, -0.3941,\n -0.1994, -0.1719],\n [ 0.5024, 0.9822, 0.0367, 0.6368, -0.9418, -0.0264, 0.6368, 0.7484,\n 0.2971, 0.3536],\n [-0.8166, -2.6654, -0.6156, -1.5613, 1.5939, 0.2270, -1.5613, -2.0829,\n -0.8154, -0.7429],\n [ 0.8236, 1.1794, 0.1140, 0.8188, -1.3313, -0.1581, 0.8188, 0.6496,\n 0.4234, 0.4306],\n [ 0.5024, 0.9822, 0.0367, 0.6368, -0.9418, -0.0264, 0.6368, 0.7484,\n 0.2971, 0.3536],\n [ 0.7440, 2.8044, 0.2052, 1.6802, -1.2906, 0.3171, 1.6802, 2.1955,\n 0.6157, 1.2878],\n [ 0.5812, 1.1442, 0.1708, 0.7329, -1.0286, -0.1250, 0.7329, 0.7886,\n 0.3818, 0.3583],\n [ 0.4260, 0.1997, -0.1121, 0.2098, -0.7526, -0.0848, 0.2098, 0.0653,\n 0.1241, 0.0637]], grad_fn=<CopySlices>)\n\n\n\n# applt softmax to get attention weights\nattns = F.softmax(attns, dim=-1)\nattns\n\ntensor([[0.1023, 0.1169, 0.0955, 0.1031, 0.0907, 0.0924, 0.1031, 0.1026, 0.0979,\n 0.0955],\n [0.0981, 0.1038, 0.0964, 0.1008, 0.0960, 0.1001, 0.1008, 0.1049, 0.0979,\n 0.1012],\n [0.0994, 0.0965, 0.0999, 0.0982, 0.1074, 0.1029, 0.0982, 0.0980, 0.0996,\n 0.0999],\n [0.1007, 0.1075, 0.0967, 0.1022, 0.0927, 0.0963, 0.1022, 0.1037, 0.0987,\n 0.0992],\n [0.0938, 0.0899, 0.0949, 0.0914, 0.1571, 0.1030, 0.0914, 0.0905, 0.0938,\n 0.0942],\n [0.1031, 0.1091, 0.0964, 0.1031, 0.0918, 0.0950, 0.1031, 0.1010, 0.0987,\n 0.0988],\n [0.1007, 0.1075, 0.0967, 0.1022, 0.0927, 0.0963, 0.1022, 0.1037, 0.0987,\n 0.0992],\n [0.0942, 0.1282, 0.0924, 0.1010, 0.0905, 0.0927, 0.1010, 0.1091, 0.0937,\n 0.0973],\n [0.1007, 0.1092, 0.0971, 0.1025, 0.0924, 0.0953, 0.1025, 0.1032, 0.0987,\n 0.0985],\n [0.1043, 0.1013, 0.0983, 0.1014, 0.0945, 0.0985, 0.1014, 0.0999, 0.1005,\n 0.0999]], grad_fn=<SoftmaxBackward0>)\n\n\n\nplt.imshow(attns.detach().numpy(), cmap='hot', interpolation='nearest')\n\n\n\n\n\n\n\n\n\ndf = pd.DataFrame(attns.detach().numpy())\ndf\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n\n\n\n\n0\n0.102279\n0.116902\n0.095548\n0.103101\n0.090652\n0.092365\n0.103101\n0.102647\n0.097894\n0.095509\n\n\n1\n0.098137\n0.103838\n0.096373\n0.100781\n0.096004\n0.100102\n0.100781\n0.104920\n0.097907\n0.101158\n\n\n2\n0.099379\n0.096506\n0.099920\n0.098203\n0.107403\n0.102867\n0.098203\n0.097955\n0.099648\n0.099917\n\n\n3\n0.100661\n0.107542\n0.096716\n0.102229\n0.092731\n0.096318\n0.102229\n0.103722\n0.098675\n0.099177\n\n\n4\n0.093831\n0.089897\n0.094899\n0.091359\n0.157125\n0.103029\n0.091359\n0.090467\n0.093837\n0.094197\n\n\n5\n0.103120\n0.109096\n0.096438\n0.103056\n0.091776\n0.094960\n0.103056\n0.100971\n0.098732\n0.098796\n\n\n6\n0.100661\n0.107542\n0.096716\n0.102229\n0.092731\n0.096318\n0.102229\n0.103722\n0.098675\n0.099177\n\n\n7\n0.094160\n0.128211\n0.092408\n0.100973\n0.090542\n0.092696\n0.100973\n0.109109\n0.093650\n0.097276\n\n\n8\n0.100667\n0.109200\n0.097085\n0.102458\n0.092360\n0.095326\n0.102458\n0.103196\n0.098727\n0.098525\n\n\n9\n0.104278\n0.101318\n0.098286\n0.101434\n0.094502\n0.098512\n0.101434\n0.099883\n0.100485\n0.099867\n\n\n\n\n\n\n\n\ndf.sum(axis=1)\n\n0 1.0\n1 1.0\n2 1.0\n3 1.0\n4 1.0\n5 1.0\n6 1.0\n7 1.0\n8 1.0\n9 1.0\ndtype: float32\n\n\n\nupdated_embedding_1.shape\n\ntorch.Size([10, 10, 8])\n\n\n\nvs[0].shape\n\ntorch.Size([1, 8])" }, { "objectID": "posts/2018-01-13-denoising.html", @@ -662,7 +669,7 @@ "href": "posts/2020-03-29-param-learning.html", "title": "Learning Gaussian Process regression parameters using gradient descent", "section": "", - "text": "In previous posts, I have talked about GP regression:\n\nPost 1 on programatically understanding GPs\nPost 2 on making use of a popular GP library called GPy\n\nIn this post, I will be talking about how to learn the parameters of a GP. I’ll keep this post simple and specific to a trivial example using RBF kernel (though the methods discussed are general.)\nTo keep things simple, we will assume a mean prior of zero and we will only be learning the parameters of the kernel function.\n\nKey Idea\n\nWrite the expression of log likelihood of data in terms of kernel parameters\nUse gradient descent to optimize the objective (negative log likelihood) and update the kernel parameters\n\n\n\nDefining log-likelihood\nIn our previous post we had mentioned (for the noiseless case):\nGiven train data \\[\nD=\\left(x_{i}, y_{i}\\right), i=1: N\n\\] Given a test set \\(X_{*}\\) of size \\(N_{*} \\times d\\) containing \\(N_{*}\\) points in \\(\\mathbb{R}^{d},\\) we want to predict function outputs \\(y_{*}\\) We can write: \\[\n\\left(\\begin{array}{l}\ny \\\\\ny_{*}\n\\end{array}\\right) \\sim \\mathcal{N}\\left(\\left(\\begin{array}{l}\n\\mu \\\\\n\\mu_{*}\n\\end{array}\\right),\\left(\\begin{array}{cc}\nK & K_{*} \\\\\nK_{*}^{T} & K_{* *}\n\\end{array}\\right)\\right)\n\\] where \\[\n\\begin{aligned}\nK &=\\operatorname{Ker}(X, X) \\in \\mathbb{R}^{N \\times N} \\\\\nK_{*} &=\\operatorname{Ker}\\left(X, X_{*}\\right) \\in \\mathbb{R}^{N \\times N} \\\\\nK_{* *} &=\\operatorname{Ker}\\left(X_{*}, X_{*}\\right) \\in \\mathbb{R}^{N_{*} \\times N_{*}}\n\\end{aligned}\n\\]\nThus, from the property of conditioning of multivariate Gaussian, we know that:\n\\[y \\sim \\mathcal{N}_N(\\mu, K)\\]\nWe will assume \\(\\mu\\) to be zero. Thus, we have for the train data, the following expression:\n\\[y \\sim \\mathcal{N}_N(0, K)\\]\nFor the noisy case, we have:\n\\[y \\sim \\mathcal{N}_N(0, K + \\sigma_{noise}^2\\mathcal{I}_N)\\]\nFrom this expression, we can write the log-likelihood of data computed over the kernel parameters \\(\\theta\\) as:\n\\[\\mathcal{LL}(\\theta) = \\log(\\frac{\\exp((-1/2)(y-0)^T (K+\\sigma_{noise}^2\\mathcal{I}_N)^{-1}(y-0))}{(2\\pi)^{N/2}|(K+\\sigma_{noise}^2\\mathcal{I}_N)|^{1/2}})\\]\nThus, we can write:\n\\[\\mathcal{LL}(\\theta) =\\log P(\\mathbf{y} | X, \\theta)=-\\frac{1}{2} \\mathbf{y}^{\\top} M^{-1} \\mathbf{y}-\\frac{1}{2} \\log |M|-\\frac{N}{2} \\log 2 \\pi\\]\nwhere \\[M = K + \\sigma_{noise}^2\\mathcal{I}_N\\]\n\n\nImports\nAs before, we will be using the excellent Autograd library for automatically computing the gradient of an objective function with respect to the parameters. We will also be using GPy for verifying our calculations.\nLet us start with some basic imports.\n\nimport autograd.numpy as np\n\nfrom matplotlib import pyplot as plt\n%matplotlib inline\n\nimport warnings\nwarnings.filterwarnings('ignore')\n\nimport GPy\n\n\n\nDefining our RBF kernel\nThe definition of the (1-dimensional) RBF kernel has a Gaussian-form, defined as:\n\\[\n \\kappa_\\mathrm{rbf}(x_1,x_2) = \\sigma^2\\exp\\left(-\\frac{(x_1-x_2)^2}{2\\mathscr{l}^2}\\right)\n\\]\n\ndef rbf(x1, x2, sigma, l):\n return (sigma**2)*(np.exp(-(x1-x2)**2/(2*(l**2)))) \n\n\n\nDefining GPy’s RBF kernel\n\n# Create a 1-D RBF kernel with default parameters\nk = GPy.kern.RBF(1)\n# Preview the kernel's parameters\nk\n\n\n\n\n\n\n\nrbf.\nvalue\nconstraints\npriors\n\n\nvariance\n1.0\n+ve\n\n\n\nlengthscale\n1.0\n+ve\n\n\n\n\n\n\n\n\n\nMatching our RBF kernel with GPy’s kernel\n\nrbf(1, 0, 1, 1)==k.K(np.array([[1]]), np.array([[0]])).flatten()\n\narray([ True])\n\n\nLooks good. Our function is matching GPy’s kernel.\n\n\nGP Regresion\n\nCreating a data set\n\n# lambda function, call f(x) to generate data\nf = lambda x: 0.4*x**2 - 0.15*x**3 + 0.5*x**2 - 0.002*x**5 + 0.0002*x**6 +0.5*(x-2)**2\nn = 20\nnp.random.seed(0)\nX = np.linspace(0.05, 4.95, n)[:,None]\nY = f(X) + np.random.normal(0., 0.1, (n,1)) # note that np.random.normal takes mean and s.d. (not variance), 0.1^2 = 0.01\nplt.plot(X, Y, \"kx\", mew=2, label='Train points')\nplt.xlabel(\"x\"), plt.ylabel(\"f\")\nplt.legend();\n\n\n\n\n\n\n\n\n\n\n\nFunction to compute negative log likelihood\nBased on our above mentioned theory, we can now write the NLL function as follows\n\ndef nll(sigma=1, l=1, noise_std=1):\n n = X.shape[0]\n cov = rbf(X, X.T, sigma, l) + (noise_std**2)*np.eye(X.shape[0])\n nll_ar = 0.5*(Y.T@np.linalg.inv(cov)@Y) + 0.5*n*np.log(2*np.pi) + 0.5*np.log(np.linalg.det(cov)) \n return nll_ar[0,0]\n\n\n\nComparing the NLL from our method with the NLL from GPy\nWe will now compare the NLL from our method with GPy for a fixed set of parameters\n\nnll(1, 1, 1)\n\n40.103960984801276\n\n\n\nk.lengthscale = 1\nk.variance = 1\nm = GPy.models.GPRegression(X, Y, k, normalizer=False)\nm.Gaussian_noise = 1\nprint(m)\n\n\nName : GP regression\nObjective : 40.103961039553916\nNumber of Parameters : 3\nNumber of Optimization Parameters : 3\nUpdates : True\nParameters:\n GP_regression. | value | constraints | priors\n rbf.variance | 1.0 | +ve | \n rbf.lengthscale | 1.0 | +ve | \n Gaussian_noise.variance | 1.0 | +ve | \n\n\nExcellent, we can see that our method gives the same NLL. Looks like we are on the right track! One caveat here is that I have set the normalizer to be False, which means that GPy will not be mean centering the data.\n\n\nOptimizing the GP using GPy\nWe will now use GPy to optimize the GP parameters\n\nm = GPy.models.GPRegression(X, Y, k, normalizer=False)\nm.optimize()\nprint(m)\n\n\nName : GP regression\nObjective : -2.9419881541130053\nNumber of Parameters : 3\nNumber of Optimization Parameters : 3\nUpdates : True\nParameters:\n GP_regression. | value | constraints | priors\n rbf.variance | 27.837243180547883 | +ve | \n rbf.lengthscale | 2.732180018958835 | +ve | \n Gaussian_noise.variance | 0.007573211752763481 | +ve | \n\n\nIt seems that variance close to 28 and length scale close to 2.7 give the optimum objective for the GP\n\n\nPlotting the NLL as a function of variance and lenghtscale\nWe will now plot the NLL obtained from our calculations as a function of variance and lengthscale. For comparing our solution with GPy solution, I will be setting noise variance to be 0.0075\n\nimport numpy as numpy\nx_grid_2, y_grid_2 = numpy.mgrid[0.1:6:0.04, 0.1:4:0.03]\n\nli = np.zeros_like(x_grid_2)\nfor i in range(x_grid_2.shape[0]):\n for j in range(x_grid_2.shape[1]):\n li[i, j] = nll(x_grid_2[i, j], y_grid_2[i, j], np.sqrt(.007573211752763481))\n\n\nplt.contourf(x_grid_2, y_grid_2, li)\nplt.gca().set_aspect('equal')\nplt.xlabel(r\"$\\sigma$\")\nplt.ylabel(r\"$l$\")\nplt.colorbar()\nplt.title(r\"NLL ($\\sigma, l$)\")\n\nText(0.5, 1.0, 'NLL ($\\\\sigma, l$)')\n\n\n\n\n\n\n\n\n\nWe will now try to find the “optimum” \\(\\sigma\\) and lengthscale from this NLL space.\n\nprint(li.min())\naa, bb = np.unravel_index(li.argmin(), li.shape)\nprint(x_grid_2[aa, 0]**2, y_grid_2[bb, 0])\n\n-2.9418973674348727\n28.09 0.1\n\n\nExcellent, it looks like we are pretty close to the optimum NLL as reported by GPy and our parameters learnt are also pretty similar. But, we have not even done a thorough search. We will now be using gradient descent to help us find the optimum set of parameters.\n\n\nGradient descent using autograd\n\nfrom autograd import elementwise_grad as egrad\nfrom autograd import grad\n\n\ngrad_objective = grad(nll, argnum=[0, 1, 2])\n\n\nVisualising the objective as a function of iteration\n\nsigma = 2.\nl = 2.\nnoise = 1.\nlr = 1e-3\nnum_iter = 100\nnll_arr = np.zeros(num_iter)\nfor iteration in range(num_iter):\n nll_arr[iteration] = nll(sigma, l, noise)\n del_sigma, del_l, del_noise = grad_objective(sigma, l, noise)\n sigma = sigma - lr*del_sigma\n l = l - lr*del_l\n noise = noise - lr*del_noise\n\n\nprint(sigma**2, l, noise)\n\n5.108812267877177 1.9770216805277476 0.11095385387537618\n\n\n\nplt.plot(nll_arr)\nplt.xlabel(\"Iteration\")\nplt.ylabel(\"NLL\")\n\nText(0, 0.5, 'NLL')\n\n\n\n\n\n\n\n\n\n\n\nApplying gradient descent and visualising the learnt function\n\nsigma = 2.\nl = 2.\nnoise = 1.\nlr = 1e-3\nnum_iter = 100\nnll_arr = np.zeros(num_iter)\nfig, ax = plt.subplots()\nfor iteration in range(num_iter):\n nll_arr[iteration] = nll(sigma, l, noise)\n del_sigma, del_l, del_noise = grad_objective(sigma, l, noise)\n sigma = sigma - lr*del_sigma\n l = l - lr*del_l\n noise = noise - lr*del_noise\n k.lengthscale = l\n k.variance = sigma**2\n m = GPy.models.GPRegression(X, Y, k, normalizer=False)\n m.Gaussian_noise = noise**2\n m.plot(ax=ax)['dataplot'];\n plt.ylim((0, 6))\n plt.title(f\"Iteration: {iteration:04}, Objective :{nll_arr[iteration]}\")\n plt.savefig(f\"/home/nipunbatra-pc/Desktop/gp_learning/{iteration:04}.png\")\n plt.cla();\nplt.clf()\n\n<Figure size 432x288 with 0 Axes>\n\n\n\n!convert -delay 20 -loop 0 /home/nipunbatra-pc/Desktop/gp_learning/*.png gp-learning.gif\n\n\nExcellent, we can see the “learning” process over time. Our final objective is comparable to GPy’s objective.\nThere are a few things I have mentioned, yet have not gone into their details and I would encourage you to try those out.\n\nFirst, you should try the gradient descent procedure with restarts. Run with different random initialisations and finally report the parameters which give the optimum likelihood.\nWe assume mean zero prior here. However, we are not processing the data and thus the zero mean assumption is not very well suited to our data. If you reduce the number of data points, you would quickly see the GP prediction to fall close to zero.\n\nThere you go. Till next time!" + "text": "In previous posts, I have talked about GP regression:\n\nPost 1 on programatically understanding GPs\nPost 2 on making use of a popular GP library called GPy\n\nIn this post, I will be talking about how to learn the parameters of a GP. I’ll keep this post simple and specific to a trivial example using RBF kernel (though the methods discussed are general.)\nTo keep things simple, we will assume a mean prior of zero and we will only be learning the parameters of the kernel function.\n\nKey Idea\n\nWrite the expression of log likelihood of data in terms of kernel parameters\nUse gradient descent to optimize the objective (negative log likelihood) and update the kernel parameters\n\n\n\nDefining log-likelihood\nIn our previous post we had mentioned (for the noiseless case):\nGiven train data \\[\nD=\\left(x_{i}, y_{i}\\right), i=1: N\n\\] Given a test set \\(X_{*}\\) of size \\(N_{*} \\times d\\) containing \\(N_{*}\\) points in \\(\\mathbb{R}^{d},\\) we want to predict function outputs \\(y_{*}\\) We can write: \\[\n\\left(\\begin{array}{l}\ny \\\\\ny_{*}\n\\end{array}\\right) \\sim \\mathcal{N}\\left(\\left(\\begin{array}{l}\n\\mu \\\\\n\\mu_{*}\n\\end{array}\\right),\\left(\\begin{array}{cc}\nK & K_{*} \\\\\nK_{*}^{T} & K_{* *}\n\\end{array}\\right)\\right)\n\\] where \\[\n\\begin{aligned}\nK &=\\operatorname{Ker}(X, X) \\in \\mathbb{R}^{N \\times N} \\\\\nK_{*} &=\\operatorname{Ker}\\left(X, X_{*}\\right) \\in \\mathbb{R}^{N \\times N} \\\\\nK_{* *} &=\\operatorname{Ker}\\left(X_{*}, X_{*}\\right) \\in \\mathbb{R}^{N_{*} \\times N_{*}}\n\\end{aligned}\n\\]\nThus, from the property of conditioning of multivariate Gaussian, we know that:\n\\[y \\sim \\mathcal{N}_N(\\mu, K)\\]\nWe will assume \\(\\mu\\) to be zero. Thus, we have for the train data, the following expression:\n\\[y \\sim \\mathcal{N}_N(0, K)\\]\nFor the noisy case, we have:\n\\[y \\sim \\mathcal{N}_N(0, K + \\sigma_{noise}^2\\mathcal{I}_N)\\]\nFrom this expression, we can write the log-likelihood of data computed over the kernel parameters \\(\\theta\\) as:\n\\[\\mathcal{LL}(\\theta) = \\log(\\frac{\\exp((-1/2)(y-0)^T (K+\\sigma_{noise}^2\\mathcal{I}_N)^{-1}(y-0))}{(2\\pi)^{N/2}|(K+\\sigma_{noise}^2\\mathcal{I}_N)|^{1/2}})\\]\nThus, we can write:\n\\[\\mathcal{LL}(\\theta) =\\log P(\\mathbf{y} | X, \\theta)=-\\frac{1}{2} \\mathbf{y}^{\\top} M^{-1} \\mathbf{y}-\\frac{1}{2} \\log |M|-\\frac{N}{2} \\log 2 \\pi\\]\nwhere \\[M = K + \\sigma_{noise}^2\\mathcal{I}_N\\]\n\n\nImports\nAs before, we will be using the excellent Autograd library for automatically computing the gradient of an objective function with respect to the parameters. We will also be using GPy for verifying our calculations.\nLet us start with some basic imports.\n\nimport autograd.numpy as np\n\nfrom matplotlib import pyplot as plt\n%matplotlib inline\n\nimport warnings\nwarnings.filterwarnings('ignore')\n\nimport GPy\n\n\n\nDefining our RBF kernel\nThe definition of the (1-dimensional) RBF kernel has a Gaussian-form, defined as:\n\\[\n \\kappa_\\mathrm{rbf}(x_1,x_2) = \\sigma^2\\exp\\left(-\\frac{(x_1-x_2)^2}{2\\mathscr{l}^2}\\right)\n\\]\n\ndef rbf(x1, x2, sigma, l):\n return (sigma**2)*(np.exp(-(x1-x2)**2/(2*(l**2)))) \n\n\n\nDefining GPy’s RBF kernel\n\n# Create a 1-D RBF kernel with default parameters\nk = GPy.kern.RBF(1)\n# Preview the kernel's parameters\nk\n\n\n\n\n\n\nrbf.\nvalue\nconstraints\npriors\n\n\nvariance\n1.0\n+ve\n\n\n\nlengthscale\n1.0\n+ve\n\n\n\n\n\n\n\n\nMatching our RBF kernel with GPy’s kernel\n\nrbf(1, 0, 1, 1)==k.K(np.array([[1]]), np.array([[0]])).flatten()\n\narray([ True])\n\n\nLooks good. Our function is matching GPy’s kernel.\n\n\nGP Regresion\n\nCreating a data set\n\n# lambda function, call f(x) to generate data\nf = lambda x: 0.4*x**2 - 0.15*x**3 + 0.5*x**2 - 0.002*x**5 + 0.0002*x**6 +0.5*(x-2)**2\nn = 20\nnp.random.seed(0)\nX = np.linspace(0.05, 4.95, n)[:,None]\nY = f(X) + np.random.normal(0., 0.1, (n,1)) # note that np.random.normal takes mean and s.d. (not variance), 0.1^2 = 0.01\nplt.plot(X, Y, \"kx\", mew=2, label='Train points')\nplt.xlabel(\"x\"), plt.ylabel(\"f\")\nplt.legend();\n\n\n\n\n\n\n\n\n\n\n\nFunction to compute negative log likelihood\nBased on our above mentioned theory, we can now write the NLL function as follows\n\ndef nll(sigma=1, l=1, noise_std=1):\n n = X.shape[0]\n cov = rbf(X, X.T, sigma, l) + (noise_std**2)*np.eye(X.shape[0])\n nll_ar = 0.5*(Y.T@np.linalg.inv(cov)@Y) + 0.5*n*np.log(2*np.pi) + 0.5*np.log(np.linalg.det(cov)) \n return nll_ar[0,0]\n\n\n\nComparing the NLL from our method with the NLL from GPy\nWe will now compare the NLL from our method with GPy for a fixed set of parameters\n\nnll(1, 1, 1)\n\n40.103960984801276\n\n\n\nk.lengthscale = 1\nk.variance = 1\nm = GPy.models.GPRegression(X, Y, k, normalizer=False)\nm.Gaussian_noise = 1\nprint(m)\n\n\nName : GP regression\nObjective : 40.103961039553916\nNumber of Parameters : 3\nNumber of Optimization Parameters : 3\nUpdates : True\nParameters:\n GP_regression. | value | constraints | priors\n rbf.variance | 1.0 | +ve | \n rbf.lengthscale | 1.0 | +ve | \n Gaussian_noise.variance | 1.0 | +ve | \n\n\nExcellent, we can see that our method gives the same NLL. Looks like we are on the right track! One caveat here is that I have set the normalizer to be False, which means that GPy will not be mean centering the data.\n\n\nOptimizing the GP using GPy\nWe will now use GPy to optimize the GP parameters\n\nm = GPy.models.GPRegression(X, Y, k, normalizer=False)\nm.optimize()\nprint(m)\n\n\nName : GP regression\nObjective : -2.9419881541130053\nNumber of Parameters : 3\nNumber of Optimization Parameters : 3\nUpdates : True\nParameters:\n GP_regression. | value | constraints | priors\n rbf.variance | 27.837243180547883 | +ve | \n rbf.lengthscale | 2.732180018958835 | +ve | \n Gaussian_noise.variance | 0.007573211752763481 | +ve | \n\n\nIt seems that variance close to 28 and length scale close to 2.7 give the optimum objective for the GP\n\n\nPlotting the NLL as a function of variance and lenghtscale\nWe will now plot the NLL obtained from our calculations as a function of variance and lengthscale. For comparing our solution with GPy solution, I will be setting noise variance to be 0.0075\n\nimport numpy as numpy\nx_grid_2, y_grid_2 = numpy.mgrid[0.1:6:0.04, 0.1:4:0.03]\n\nli = np.zeros_like(x_grid_2)\nfor i in range(x_grid_2.shape[0]):\n for j in range(x_grid_2.shape[1]):\n li[i, j] = nll(x_grid_2[i, j], y_grid_2[i, j], np.sqrt(.007573211752763481))\n\n\nplt.contourf(x_grid_2, y_grid_2, li)\nplt.gca().set_aspect('equal')\nplt.xlabel(r\"$\\sigma$\")\nplt.ylabel(r\"$l$\")\nplt.colorbar()\nplt.title(r\"NLL ($\\sigma, l$)\")\n\nText(0.5, 1.0, 'NLL ($\\\\sigma, l$)')\n\n\n\n\n\n\n\n\n\nWe will now try to find the “optimum” \\(\\sigma\\) and lengthscale from this NLL space.\n\nprint(li.min())\naa, bb = np.unravel_index(li.argmin(), li.shape)\nprint(x_grid_2[aa, 0]**2, y_grid_2[bb, 0])\n\n-2.9418973674348727\n28.09 0.1\n\n\nExcellent, it looks like we are pretty close to the optimum NLL as reported by GPy and our parameters learnt are also pretty similar. But, we have not even done a thorough search. We will now be using gradient descent to help us find the optimum set of parameters.\n\n\nGradient descent using autograd\n\nfrom autograd import elementwise_grad as egrad\nfrom autograd import grad\n\n\ngrad_objective = grad(nll, argnum=[0, 1, 2])\n\n\nVisualising the objective as a function of iteration\n\nsigma = 2.\nl = 2.\nnoise = 1.\nlr = 1e-3\nnum_iter = 100\nnll_arr = np.zeros(num_iter)\nfor iteration in range(num_iter):\n nll_arr[iteration] = nll(sigma, l, noise)\n del_sigma, del_l, del_noise = grad_objective(sigma, l, noise)\n sigma = sigma - lr*del_sigma\n l = l - lr*del_l\n noise = noise - lr*del_noise\n\n\nprint(sigma**2, l, noise)\n\n5.108812267877177 1.9770216805277476 0.11095385387537618\n\n\n\nplt.plot(nll_arr)\nplt.xlabel(\"Iteration\")\nplt.ylabel(\"NLL\")\n\nText(0, 0.5, 'NLL')\n\n\n\n\n\n\n\n\n\n\n\nApplying gradient descent and visualising the learnt function\n\nsigma = 2.\nl = 2.\nnoise = 1.\nlr = 1e-3\nnum_iter = 100\nnll_arr = np.zeros(num_iter)\nfig, ax = plt.subplots()\nfor iteration in range(num_iter):\n nll_arr[iteration] = nll(sigma, l, noise)\n del_sigma, del_l, del_noise = grad_objective(sigma, l, noise)\n sigma = sigma - lr*del_sigma\n l = l - lr*del_l\n noise = noise - lr*del_noise\n k.lengthscale = l\n k.variance = sigma**2\n m = GPy.models.GPRegression(X, Y, k, normalizer=False)\n m.Gaussian_noise = noise**2\n m.plot(ax=ax)['dataplot'];\n plt.ylim((0, 6))\n plt.title(f\"Iteration: {iteration:04}, Objective :{nll_arr[iteration]}\")\n plt.savefig(f\"/home/nipunbatra-pc/Desktop/gp_learning/{iteration:04}.png\")\n plt.cla();\nplt.clf()\n\n<Figure size 432x288 with 0 Axes>\n\n\n\n!convert -delay 20 -loop 0 /home/nipunbatra-pc/Desktop/gp_learning/*.png gp-learning.gif\n\n\nExcellent, we can see the “learning” process over time. Our final objective is comparable to GPy’s objective.\nThere are a few things I have mentioned, yet have not gone into their details and I would encourage you to try those out.\n\nFirst, you should try the gradient descent procedure with restarts. Run with different random initialisations and finally report the parameters which give the optimum likelihood.\nWe assume mean zero prior here. However, we are not processing the data and thus the zero mean assumption is not very well suited to our data. If you reduce the number of data points, you would quickly see the GP prediction to fall close to zero.\n\nThere you go. Till next time!" }, { "objectID": "posts/2022-02-11-matrix.html", @@ -683,7 +690,7 @@ "href": "posts/2017-08-02-fifty-ggplot-python-1.html", "title": "Top 50 ggplot2 Visualizations in Python - Part 1", "section": "", - "text": "A while back, I read this wonderful article called “Top 50 ggplot2 Visualizations - The Master List (With Full R Code)”. Many of the plots looked very useful. In this post, I’ll look at creating the first of the plot in Python (with the help of Stack Overflow).\nHere’s how the end result should look like.\n\nHow the final plot should look like\n\n\n\nAttributes of above plot\n\nX-Y scatter for area vs population\nColor by state\nMarker-size by population\n\nI’ll first use Pandas to create the plot. Pandas plotting capabilites are almost the first thing I use to create plots. Next, I’ll show how to use Seaborn to reduce some complexity. Lastly, I’ll use Altair, ggplot and Plotnine to show how it focuses on getting directly to the point, i.e. expressing the 3 required attributes!\n\n\nTLDR: Declarative visualisatio) is super useful!\n\n\nOriginal R code\n# install.packages(\"ggplot2\")\n# load package and data\noptions(scipen=999) # turn-off scientific notation like 1e+48\nlibrary(ggplot2)\ntheme_set(theme_bw()) # pre-set the bw theme.\ndata(\"midwest\", package = \"ggplot2\")\n# midwest <- read.csv(\"http://goo.gl/G1K41K\") # bkup data source\n\n# Scatterplot\ngg <- ggplot(midwest, aes(x=area, y=poptotal)) + \n geom_point(aes(col=state, size=popdensity)) + \n geom_smooth(method=\"loess\", se=F) + \n xlim(c(0, 0.1)) + \n ylim(c(0, 500000)) + \n labs(subtitle=\"Area Vs Population\", \n y=\"Population\", \n x=\"Area\", \n title=\"Scatterplot\", \n caption = \"Source: midwest\")\n\nplot(gg)\n\n%matplotlib inline\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport pandas as pd\n\n\n\nColor scheme (borrowed from Randy Olson’s website)\n\n# Tableau 20 Colors\ntableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120), \n (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150), \n (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148), \n (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199), \n (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]\n \n\n# Rescale to values between 0 and 1 \nfor i in range(len(tableau20)): \n r, g, b = tableau20[i] \n tableau20[i] = (r / 255., g / 255., b / 255.)\n\n\n\nGetting the data\n\nmidwest= pd.read_csv(\"http://goo.gl/G1K41K\") \n# Filtering\nmidwest= midwest[midwest.poptotal<50000]\n\n\nmidwest.head().loc[:, ['area'] ]\n\n\n\n\n\n\n\n\narea\n\n\n\n\n1\n0.014\n\n\n2\n0.022\n\n\n3\n0.017\n\n\n4\n0.018\n\n\n5\n0.050\n\n\n\n\n\n\n\n\n\n\nDefault Pandas scatter plot with marker size by population density\n\nmidwest.plot(kind='scatter', x='area', y='poptotal', ylim=((0, 50000)), xlim=((0., 0.1)), s=midwest['popdensity']*0.1)\n\n\n\n\n\n\n\n\nIf we just use the default Pandas scatter, we won’t get the colour by state. For that we wil group the dataframe by states and then scatter plot each group individually.\n\n\nComplete Pandas’ solution (hand-wavy at times!)\n\nfig, ax = plt.subplots()\ngroups = midwest.groupby('state')\ncolors = tableau20[::2]\n\n# Plotting each group \nfor i, (name, group) in enumerate(groups):\n group.plot(kind='scatter', x='area', y='poptotal', ylim=((0, 50000)), xlim=((0., 0.1)),\n s=10+group['popdensity']*0.1, # hand-wavy :(\n label=name, ax=ax, color=colors[i])\n\n# Legend for State colours\nlgd = ax.legend(numpoints=1, loc=1, borderpad=1, \n frameon=True, framealpha=0.9, title=\"state\")\nfor handle in lgd.legendHandles:\n handle.set_sizes([100.0])\n\n# Make a legend for popdensity. Hand-wavy. Error prone!\npws = (pd.cut(midwest['popdensity'], bins=4, retbins=True)[1]).round(0)\nfor pw in pws:\n plt.scatter([], [], s=(pw**2)/2e4, c=\"k\",label=str(pw))\n\nh, l = plt.gca().get_legend_handles_labels()\nplt.legend(h[5:], l[5:], labelspacing=1.2, title=\"popdensity\", borderpad=1, \n frameon=True, framealpha=0.9, loc=4, numpoints=1)\n\nplt.gca().add_artist(lgd)\n\n\n\n\n\n\n\n\n\n\nUsing Seaborn\nThe solution using Seaborn is slightly less complicated as we won’t need to write the code for plotting different states on different colours. However, the legend jugglery for markersize would still be required!\n\nsizes = [10, 40, 70, 100] \nmarker_size = pd.cut(midwest['popdensity'], range(0, 2500, 500), labels=sizes) \nsns.lmplot('area', 'poptotal', data=midwest, hue='state', fit_reg=False, scatter_kws={'s':marker_size})\nplt.ylim((0, 50000))\n\n\n\n\n\n\n\n\n\n\nAltair (could not get simpler!)\n\nfrom altair import Chart\n\nchart = Chart(midwest)\nchart.mark_circle().encode(\n x='area',\n y='poptotal',\n color='state',\n size='popdensity',\n)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nggplot\n\nfrom ggplot import *\n\nggplot(aes(x='area', y='poptotal', color='state', size='popdensity'), data=midwest) +\\\n geom_point() +\\\n theme_bw() +\\\n xlab(\"Area\") +\\\n ylab(\"Population\") +\\\n ggtitle(\"Area vs Population\")\n\n\n\n\n\n\n\n\nIt was great fun (and frustration) trying to make this plot. Still some bits like LOESS are not included in the visualisation I made. The best thing about this exercise was discovering Altair! Declarative visualisation looks so natural. Way to go declarative visualisation!" + "text": "A while back, I read this wonderful article called “Top 50 ggplot2 Visualizations - The Master List (With Full R Code)”. Many of the plots looked very useful. In this post, I’ll look at creating the first of the plot in Python (with the help of Stack Overflow).\nHere’s how the end result should look like.\n\nHow the final plot should look like\n\n\n\nAttributes of above plot\n\nX-Y scatter for area vs population\nColor by state\nMarker-size by population\n\nI’ll first use Pandas to create the plot. Pandas plotting capabilites are almost the first thing I use to create plots. Next, I’ll show how to use Seaborn to reduce some complexity. Lastly, I’ll use Altair, ggplot and Plotnine to show how it focuses on getting directly to the point, i.e. expressing the 3 required attributes!\n\n\nTLDR: Declarative visualisatio) is super useful!\n\n\nOriginal R code\n# install.packages(\"ggplot2\")\n# load package and data\noptions(scipen=999) # turn-off scientific notation like 1e+48\nlibrary(ggplot2)\ntheme_set(theme_bw()) # pre-set the bw theme.\ndata(\"midwest\", package = \"ggplot2\")\n# midwest <- read.csv(\"http://goo.gl/G1K41K\") # bkup data source\n\n# Scatterplot\ngg <- ggplot(midwest, aes(x=area, y=poptotal)) + \n geom_point(aes(col=state, size=popdensity)) + \n geom_smooth(method=\"loess\", se=F) + \n xlim(c(0, 0.1)) + \n ylim(c(0, 500000)) + \n labs(subtitle=\"Area Vs Population\", \n y=\"Population\", \n x=\"Area\", \n title=\"Scatterplot\", \n caption = \"Source: midwest\")\n\nplot(gg)\n\n%matplotlib inline\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport pandas as pd\n\n\n\nColor scheme (borrowed from Randy Olson’s website)\n\n# Tableau 20 Colors\ntableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120), \n (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150), \n (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148), \n (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199), \n (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]\n \n\n# Rescale to values between 0 and 1 \nfor i in range(len(tableau20)): \n r, g, b = tableau20[i] \n tableau20[i] = (r / 255., g / 255., b / 255.)\n\n\n\nGetting the data\n\nmidwest= pd.read_csv(\"http://goo.gl/G1K41K\") \n# Filtering\nmidwest= midwest[midwest.poptotal<50000]\n\n\nmidwest.head().loc[:, ['area'] ]\n\n\n\n\n\n\n\narea\n\n\n\n\n1\n0.014\n\n\n2\n0.022\n\n\n3\n0.017\n\n\n4\n0.018\n\n\n5\n0.050\n\n\n\n\n\n\n\n\n\nDefault Pandas scatter plot with marker size by population density\n\nmidwest.plot(kind='scatter', x='area', y='poptotal', ylim=((0, 50000)), xlim=((0., 0.1)), s=midwest['popdensity']*0.1)\n\n\n\n\n\n\n\n\nIf we just use the default Pandas scatter, we won’t get the colour by state. For that we wil group the dataframe by states and then scatter plot each group individually.\n\n\nComplete Pandas’ solution (hand-wavy at times!)\n\nfig, ax = plt.subplots()\ngroups = midwest.groupby('state')\ncolors = tableau20[::2]\n\n# Plotting each group \nfor i, (name, group) in enumerate(groups):\n group.plot(kind='scatter', x='area', y='poptotal', ylim=((0, 50000)), xlim=((0., 0.1)),\n s=10+group['popdensity']*0.1, # hand-wavy :(\n label=name, ax=ax, color=colors[i])\n\n# Legend for State colours\nlgd = ax.legend(numpoints=1, loc=1, borderpad=1, \n frameon=True, framealpha=0.9, title=\"state\")\nfor handle in lgd.legendHandles:\n handle.set_sizes([100.0])\n\n# Make a legend for popdensity. Hand-wavy. Error prone!\npws = (pd.cut(midwest['popdensity'], bins=4, retbins=True)[1]).round(0)\nfor pw in pws:\n plt.scatter([], [], s=(pw**2)/2e4, c=\"k\",label=str(pw))\n\nh, l = plt.gca().get_legend_handles_labels()\nplt.legend(h[5:], l[5:], labelspacing=1.2, title=\"popdensity\", borderpad=1, \n frameon=True, framealpha=0.9, loc=4, numpoints=1)\n\nplt.gca().add_artist(lgd)\n\n\n\n\n\n\n\n\n\n\nUsing Seaborn\nThe solution using Seaborn is slightly less complicated as we won’t need to write the code for plotting different states on different colours. However, the legend jugglery for markersize would still be required!\n\nsizes = [10, 40, 70, 100] \nmarker_size = pd.cut(midwest['popdensity'], range(0, 2500, 500), labels=sizes) \nsns.lmplot('area', 'poptotal', data=midwest, hue='state', fit_reg=False, scatter_kws={'s':marker_size})\nplt.ylim((0, 50000))\n\n\n\n\n\n\n\n\n\n\nAltair (could not get simpler!)\n\nfrom altair import Chart\n\nchart = Chart(midwest)\nchart.mark_circle().encode(\n x='area',\n y='poptotal',\n color='state',\n size='popdensity',\n)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nggplot\n\nfrom ggplot import *\n\nggplot(aes(x='area', y='poptotal', color='state', size='popdensity'), data=midwest) +\\\n geom_point() +\\\n theme_bw() +\\\n xlab(\"Area\") +\\\n ylab(\"Population\") +\\\n ggtitle(\"Area vs Population\")\n\n\n\n\n\n\n\n\nIt was great fun (and frustration) trying to make this plot. Still some bits like LOESS are not included in the visualisation I made. The best thing about this exercise was discovering Altair! Declarative visualisation looks so natural. Way to go declarative visualisation!" }, { "objectID": "posts/object-detection.html", @@ -711,35 +718,35 @@ "href": "posts/2023-01-19-conformal-intro.html", "title": "Conformal Prediction", "section": "", - "text": "This is a work in progress. I will be adding more content to this post in the coming days.\nReference: https://scikit-learn.org/stable/auto_examples/calibration/plot_calibration_multiclass.html#sphx-glr-auto-examples-calibration-plot-calibration-multiclass-py\n\nimport sklearn\nimport pandas as pd\n\nimport matplotlib.pyplot as plt\n%matplotlib inline\n%config InlineBackend.figure_format='retina'\n\n\nimport numpy as np\nfrom sklearn.datasets import make_blobs\n\nnp.random.seed(0)\n\nX, y = make_blobs(\n n_samples=2000, n_features=2, centers=3, random_state=42, cluster_std=5.0\n)\nX_train, y_train = X[:600], y[:600]\nX_valid, y_valid = X[600:1000], y[600:1000]\nX_train_valid, y_train_valid = X[:1000], y[:1000]\nX_test, y_test = X[1000:], y[1000:]\n\n\n# Scater plot showing different classes in different colors\nplt.scatter(X[:, 0], X[:, 1], c=y ,alpha=0.7)\n\n\n\n\n\n\n\n\n\nfrom sklearn.linear_model import LogisticRegression\nlr = LogisticRegression()\nlr.fit(X_train, y_train)\n\nLogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.LogisticRegressionLogisticRegression()\n\n\n\nprob_df = pd.DataFrame(lr.predict_proba(X_valid))\nprob_df.columns = lr.classes_\nprob_df.head()\n\n\n\n\n\n\n\n\n\n0\n1\n2\n\n\n\n\n0\n0.014323\n0.959135\n0.026542\n\n\n1\n0.000326\n0.004617\n0.995057\n\n\n2\n0.667887\n0.322486\n0.009627\n\n\n3\n0.953779\n0.043703\n0.002518\n\n\n4\n0.000029\n0.000130\n0.999841\n\n\n\n\n\n\n\n\n\npd.Series(prob_df.values[np.arange(400), y_valid]).quantile(0.1)\n\n0.3934260593598625\n\n\n\n# Get the predicted probability for the correct class for each sample\ny_valid\n\narray([1, 2, 0, 0, 2, 2, 2, 1, 1, 2, 1, 1, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2,\n 1, 0, 0, 2, 0, 0, 1, 2, 0, 1, 2, 0, 0, 2, 1, 2, 0, 1, 1, 0, 0, 1,\n 0, 0, 2, 2, 1, 1, 0, 0, 0, 1, 2, 2, 2, 1, 0, 1, 1, 1, 2, 0, 1, 1,\n 0, 1, 1, 2, 2, 1, 0, 1, 1, 0, 2, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, 1,\n 0, 1, 0, 1, 2, 2, 0, 0, 0, 1, 0, 1, 2, 2, 0, 2, 0, 2, 1, 0, 0, 1,\n 2, 2, 2, 1, 0, 2, 2, 0, 0, 2, 0, 1, 2, 0, 1, 1, 2, 2, 1, 1, 2, 2,\n 0, 0, 0, 0, 0, 0, 2, 0, 1, 1, 1, 2, 0, 2, 0, 1, 1, 0, 2, 0, 1, 0,\n 1, 0, 2, 2, 0, 0, 2, 1, 0, 2, 0, 2, 0, 0, 0, 1, 2, 0, 1, 2, 0, 2,\n 1, 0, 0, 0, 0, 2, 2, 1, 0, 2, 1, 1, 2, 0, 2, 0, 1, 2, 1, 1, 0, 0,\n 2, 0, 1, 1, 1, 1, 0, 2, 2, 1, 1, 1, 0, 2, 1, 2, 2, 2, 1, 0, 0, 2,\n 0, 0, 2, 2, 0, 2, 2, 2, 0, 1, 2, 0, 2, 0, 1, 0, 2, 2, 2, 1, 0, 1,\n 1, 2, 2, 0, 2, 2, 2, 2, 0, 2, 1, 0, 1, 0, 1, 1, 1, 0, 2, 0, 2, 1,\n 0, 1, 0, 1, 2, 0, 1, 2, 2, 2, 0, 1, 0, 1, 0, 1, 1, 2, 1, 1, 2, 0,\n 1, 0, 1, 2, 0, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 2, 1, 2, 0, 1, 1, 2,\n 1, 2, 2, 0, 2, 2, 2, 0, 1, 2, 1, 0, 2, 1, 2, 0, 2, 1, 0, 1, 1, 2,\n 0, 1, 2, 0, 2, 1, 2, 0, 0, 0, 2, 1, 0, 1, 0, 2, 1, 0, 1, 2, 0, 1,\n 0, 1, 0, 2, 1, 1, 1, 2, 2, 0, 2, 2, 2, 1, 2, 1, 2, 2, 0, 0, 2, 2,\n 0, 1, 1, 0, 1, 2, 0, 1, 1, 2, 1, 0, 1, 0, 0, 2, 2, 0, 0, 1, 0, 0,\n 2, 2, 2, 2])\n\n\n\n# Get the predicted probability for the correct class for each sample\n\n\n\n\n\n\n\n\n\n1\n2\n0\n0\n2\n2\n2\n1\n1\n2\n...\n2\n0\n0\n1\n0\n0\n2\n2\n2\n2\n\n\n\n\n0\n0.959135\n0.026542\n0.014323\n0.014323\n0.026542\n0.026542\n0.026542\n0.959135\n0.959135\n0.026542\n...\n0.026542\n0.014323\n0.014323\n0.959135\n0.014323\n0.014323\n0.026542\n0.026542\n0.026542\n0.026542\n\n\n1\n0.004617\n0.995057\n0.000326\n0.000326\n0.995057\n0.995057\n0.995057\n0.004617\n0.004617\n0.995057\n...\n0.995057\n0.000326\n0.000326\n0.004617\n0.000326\n0.000326\n0.995057\n0.995057\n0.995057\n0.995057\n\n\n2\n0.322486\n0.009627\n0.667887\n0.667887\n0.009627\n0.009627\n0.009627\n0.322486\n0.322486\n0.009627\n...\n0.009627\n0.667887\n0.667887\n0.322486\n0.667887\n0.667887\n0.009627\n0.009627\n0.009627\n0.009627\n\n\n3\n0.043703\n0.002518\n0.953779\n0.953779\n0.002518\n0.002518\n0.002518\n0.043703\n0.043703\n0.002518\n...\n0.002518\n0.953779\n0.953779\n0.043703\n0.953779\n0.953779\n0.002518\n0.002518\n0.002518\n0.002518\n\n\n4\n0.000130\n0.999841\n0.000029\n0.000029\n0.999841\n0.999841\n0.999841\n0.000130\n0.000130\n0.999841\n...\n0.999841\n0.000029\n0.000029\n0.000130\n0.000029\n0.000029\n0.999841\n0.999841\n0.999841\n0.999841\n\n\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n\n\n395\n0.253215\n0.038669\n0.708116\n0.708116\n0.038669\n0.038669\n0.038669\n0.253215\n0.253215\n0.038669\n...\n0.038669\n0.708116\n0.708116\n0.253215\n0.708116\n0.708116\n0.038669\n0.038669\n0.038669\n0.038669\n\n\n396\n0.000339\n0.999576\n0.000086\n0.000086\n0.999576\n0.999576\n0.999576\n0.000339\n0.000339\n0.999576\n...\n0.999576\n0.000086\n0.000086\n0.000339\n0.000086\n0.000086\n0.999576\n0.999576\n0.999576\n0.999576\n\n\n397\n0.019843\n0.980018\n0.000139\n0.000139\n0.980018\n0.980018\n0.980018\n0.019843\n0.019843\n0.980018\n...\n0.980018\n0.000139\n0.000139\n0.019843\n0.000139\n0.000139\n0.980018\n0.980018\n0.980018\n0.980018\n\n\n398\n0.000094\n0.999780\n0.000126\n0.000126\n0.999780\n0.999780\n0.999780\n0.000094\n0.000094\n0.999780\n...\n0.999780\n0.000126\n0.000126\n0.000094\n0.000126\n0.000126\n0.999780\n0.999780\n0.999780\n0.999780\n\n\n399\n0.000133\n0.999776\n0.000092\n0.000092\n0.999776\n0.999776\n0.999776\n0.000133\n0.000133\n0.999776\n...\n0.999776\n0.000092\n0.000092\n0.000133\n0.000092\n0.000092\n0.999776\n0.999776\n0.999776\n0.999776\n\n\n\n\n400 rows × 400 columns" + "text": "This is a work in progress. I will be adding more content to this post in the coming days.\nReference: https://scikit-learn.org/stable/auto_examples/calibration/plot_calibration_multiclass.html#sphx-glr-auto-examples-calibration-plot-calibration-multiclass-py\n\nimport sklearn\nimport pandas as pd\n\nimport matplotlib.pyplot as plt\n%matplotlib inline\n%config InlineBackend.figure_format='retina'\n\n\nimport numpy as np\nfrom sklearn.datasets import make_blobs\n\nnp.random.seed(0)\n\nX, y = make_blobs(\n n_samples=2000, n_features=2, centers=3, random_state=42, cluster_std=5.0\n)\nX_train, y_train = X[:600], y[:600]\nX_valid, y_valid = X[600:1000], y[600:1000]\nX_train_valid, y_train_valid = X[:1000], y[:1000]\nX_test, y_test = X[1000:], y[1000:]\n\n\n# Scater plot showing different classes in different colors\nplt.scatter(X[:, 0], X[:, 1], c=y ,alpha=0.7)\n\n\n\n\n\n\n\n\n\nfrom sklearn.linear_model import LogisticRegression\nlr = LogisticRegression()\nlr.fit(X_train, y_train)\n\nLogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.LogisticRegressionLogisticRegression()\n\n\n\nprob_df = pd.DataFrame(lr.predict_proba(X_valid))\nprob_df.columns = lr.classes_\nprob_df.head()\n\n\n\n\n\n\n\n\n0\n1\n2\n\n\n\n\n0\n0.014323\n0.959135\n0.026542\n\n\n1\n0.000326\n0.004617\n0.995057\n\n\n2\n0.667887\n0.322486\n0.009627\n\n\n3\n0.953779\n0.043703\n0.002518\n\n\n4\n0.000029\n0.000130\n0.999841\n\n\n\n\n\n\n\n\npd.Series(prob_df.values[np.arange(400), y_valid]).quantile(0.1)\n\n0.3934260593598625\n\n\n\n# Get the predicted probability for the correct class for each sample\ny_valid\n\narray([1, 2, 0, 0, 2, 2, 2, 1, 1, 2, 1, 1, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2,\n 1, 0, 0, 2, 0, 0, 1, 2, 0, 1, 2, 0, 0, 2, 1, 2, 0, 1, 1, 0, 0, 1,\n 0, 0, 2, 2, 1, 1, 0, 0, 0, 1, 2, 2, 2, 1, 0, 1, 1, 1, 2, 0, 1, 1,\n 0, 1, 1, 2, 2, 1, 0, 1, 1, 0, 2, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, 1,\n 0, 1, 0, 1, 2, 2, 0, 0, 0, 1, 0, 1, 2, 2, 0, 2, 0, 2, 1, 0, 0, 1,\n 2, 2, 2, 1, 0, 2, 2, 0, 0, 2, 0, 1, 2, 0, 1, 1, 2, 2, 1, 1, 2, 2,\n 0, 0, 0, 0, 0, 0, 2, 0, 1, 1, 1, 2, 0, 2, 0, 1, 1, 0, 2, 0, 1, 0,\n 1, 0, 2, 2, 0, 0, 2, 1, 0, 2, 0, 2, 0, 0, 0, 1, 2, 0, 1, 2, 0, 2,\n 1, 0, 0, 0, 0, 2, 2, 1, 0, 2, 1, 1, 2, 0, 2, 0, 1, 2, 1, 1, 0, 0,\n 2, 0, 1, 1, 1, 1, 0, 2, 2, 1, 1, 1, 0, 2, 1, 2, 2, 2, 1, 0, 0, 2,\n 0, 0, 2, 2, 0, 2, 2, 2, 0, 1, 2, 0, 2, 0, 1, 0, 2, 2, 2, 1, 0, 1,\n 1, 2, 2, 0, 2, 2, 2, 2, 0, 2, 1, 0, 1, 0, 1, 1, 1, 0, 2, 0, 2, 1,\n 0, 1, 0, 1, 2, 0, 1, 2, 2, 2, 0, 1, 0, 1, 0, 1, 1, 2, 1, 1, 2, 0,\n 1, 0, 1, 2, 0, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 2, 1, 2, 0, 1, 1, 2,\n 1, 2, 2, 0, 2, 2, 2, 0, 1, 2, 1, 0, 2, 1, 2, 0, 2, 1, 0, 1, 1, 2,\n 0, 1, 2, 0, 2, 1, 2, 0, 0, 0, 2, 1, 0, 1, 0, 2, 1, 0, 1, 2, 0, 1,\n 0, 1, 0, 2, 1, 1, 1, 2, 2, 0, 2, 2, 2, 1, 2, 1, 2, 2, 0, 0, 2, 2,\n 0, 1, 1, 0, 1, 2, 0, 1, 1, 2, 1, 0, 1, 0, 0, 2, 2, 0, 0, 1, 0, 0,\n 2, 2, 2, 2])\n\n\n\n# Get the predicted probability for the correct class for each sample\n\n\n\n\n\n\n\n\n1\n2\n0\n0\n2\n2\n2\n1\n1\n2\n...\n2\n0\n0\n1\n0\n0\n2\n2\n2\n2\n\n\n\n\n0\n0.959135\n0.026542\n0.014323\n0.014323\n0.026542\n0.026542\n0.026542\n0.959135\n0.959135\n0.026542\n...\n0.026542\n0.014323\n0.014323\n0.959135\n0.014323\n0.014323\n0.026542\n0.026542\n0.026542\n0.026542\n\n\n1\n0.004617\n0.995057\n0.000326\n0.000326\n0.995057\n0.995057\n0.995057\n0.004617\n0.004617\n0.995057\n...\n0.995057\n0.000326\n0.000326\n0.004617\n0.000326\n0.000326\n0.995057\n0.995057\n0.995057\n0.995057\n\n\n2\n0.322486\n0.009627\n0.667887\n0.667887\n0.009627\n0.009627\n0.009627\n0.322486\n0.322486\n0.009627\n...\n0.009627\n0.667887\n0.667887\n0.322486\n0.667887\n0.667887\n0.009627\n0.009627\n0.009627\n0.009627\n\n\n3\n0.043703\n0.002518\n0.953779\n0.953779\n0.002518\n0.002518\n0.002518\n0.043703\n0.043703\n0.002518\n...\n0.002518\n0.953779\n0.953779\n0.043703\n0.953779\n0.953779\n0.002518\n0.002518\n0.002518\n0.002518\n\n\n4\n0.000130\n0.999841\n0.000029\n0.000029\n0.999841\n0.999841\n0.999841\n0.000130\n0.000130\n0.999841\n...\n0.999841\n0.000029\n0.000029\n0.000130\n0.000029\n0.000029\n0.999841\n0.999841\n0.999841\n0.999841\n\n\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n\n\n395\n0.253215\n0.038669\n0.708116\n0.708116\n0.038669\n0.038669\n0.038669\n0.253215\n0.253215\n0.038669\n...\n0.038669\n0.708116\n0.708116\n0.253215\n0.708116\n0.708116\n0.038669\n0.038669\n0.038669\n0.038669\n\n\n396\n0.000339\n0.999576\n0.000086\n0.000086\n0.999576\n0.999576\n0.999576\n0.000339\n0.000339\n0.999576\n...\n0.999576\n0.000086\n0.000086\n0.000339\n0.000086\n0.000086\n0.999576\n0.999576\n0.999576\n0.999576\n\n\n397\n0.019843\n0.980018\n0.000139\n0.000139\n0.980018\n0.980018\n0.980018\n0.019843\n0.019843\n0.980018\n...\n0.980018\n0.000139\n0.000139\n0.019843\n0.000139\n0.000139\n0.980018\n0.980018\n0.980018\n0.980018\n\n\n398\n0.000094\n0.999780\n0.000126\n0.000126\n0.999780\n0.999780\n0.999780\n0.000094\n0.000094\n0.999780\n...\n0.999780\n0.000126\n0.000126\n0.000094\n0.000126\n0.000126\n0.999780\n0.999780\n0.999780\n0.999780\n\n\n399\n0.000133\n0.999776\n0.000092\n0.000092\n0.999776\n0.999776\n0.999776\n0.000133\n0.000133\n0.999776\n...\n0.999776\n0.000092\n0.000092\n0.000133\n0.000092\n0.000092\n0.999776\n0.999776\n0.999776\n0.999776\n\n\n\n\n400 rows × 400 columns" }, { "objectID": "posts/siren-paper-impl.html", "href": "posts/siren-paper-impl.html", "title": "SIREN paper implementation", "section": "", - "text": "TLDR: Sine activation function is better than ReLU for reconstructing images\n\n\n\n\n\n\n\n\n\nReconstruction from ReLU\n\n\n\n\n\n\n\nReconstruction from Sine\n\n\n\n\n\n\n\nAnimation of the training process\n\n\n\n\n\n\n\n\n\nReconstruction from ReLU\n\n\n\n\n\n\n\nReconstruction from Sine\n\n\n\n\n\n\nimport torch\nimport torchvision\nimport torchvision.transforms as transforms\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n# Remove all the warnings\nimport warnings\nwarnings.filterwarnings('ignore')\n\n# Set env CUDA_LAUNCH_BLOCKING=1\nimport os\nos.environ['CUDA_LAUNCH_BLOCKING'] = '1'\n\n\n!wget https://segment-anything.com/assets/gallery/AdobeStock_94274587_welsh_corgi_pembroke_CD.jpg -O dog.jpg\n\n--2023-04-27 17:21:53-- https://segment-anything.com/assets/gallery/AdobeStock_94274587_welsh_corgi_pembroke_CD.jpg\nResolving segment-anything.com (segment-anything.com)... 108.138.128.23, 108.138.128.8, 108.138.128.34, ...\nConnecting to segment-anything.com (segment-anything.com)|108.138.128.23|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 221810 (217K) [image/jpeg]\nSaving to: ‘dog.jpg’\n\ndog.jpg 100%[===================>] 216.61K 400KB/s in 0.5s \n\n2023-04-27 17:21:55 (400 KB/s) - ‘dog.jpg’ saved [221810/221810]\n\n\n\n\n# Read in a image from torchvision\nimg = torchvision.io.read_image(\"dog.jpg\")\n\n\nplt.imshow(img.permute(1, 2, 0))\n\n\n\n\n\n\n\n\n\n# Normalize the image\nimg = img / 255.0\n\n\nimg.shape\n\ntorch.Size([3, 1365, 2048])\n\n\n\n# Take a random 224x224 crop of the image\ncrop = torchvision.transforms.functional.crop(img, 600, 750, 400, 400)\n\n\n# Plot the crop\nplt.imshow(crop.permute(1, 2, 0))\n\n\n\n\n\n\n\n\n\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n\n\n# Get the dimensions of the image tensor\nnum_channels, height, width = crop.shape\n\n# Create a 2D grid of (x,y) coordinates\nx_coords = torch.arange(width).repeat(height, 1)\ny_coords = torch.arange(height).repeat(width, 1).t()\nx_coords = x_coords.reshape(-1)\ny_coords = y_coords.reshape(-1)\n\n# Combine the x and y coordinates into a single tensor\nX = torch.stack([x_coords, y_coords], dim=1).float()\n\n# Move X to GPU if available\nX = X.to(device)\n\n\nnum_xy = height * width\nnum_xy\n\n160000\n\n\n\nX.shape, X\n\n(torch.Size([160000, 2]),\n tensor([[ 0., 0.],\n [ 1., 0.],\n [ 2., 0.],\n ...,\n [397., 399.],\n [398., 399.],\n [399., 399.]], device='cuda:0'))\n\n\n\n# Extract pixel values from image tensor\npixel_values = crop.reshape(num_channels, -1).float().to(device)\n\n# Transpose the pixel values to be (num_xy, num_channels)\npixel_values = pixel_values.transpose(0, 1)\n\ny = pixel_values.to(device)\n\n\n# Create a MLP with 5 hidden layers with 256 neurons each and ReLU activations.\n# Input is (x, y) and output is (r, g, b)\n\nclass MLP(nn.Module):\n def __init__(self):\n super().__init__()\n self.fc1 = nn.Linear(2, 256)\n self.fc2 = nn.Linear(256, 256)\n self.fc3 = nn.Linear(256, 256)\n self.fc4 = nn.Linear(256, 256)\n self.fc5 = nn.Linear(256, 256)\n self.fc6 = nn.Linear(256, 3)\n\n def forward(self, x):\n x = F.relu(self.fc1(x))\n x = F.relu(self.fc2(x))\n x = F.relu(self.fc3(x))\n x = F.relu(self.fc4(x))\n x = F.relu(self.fc5(x))\n return self.fc6(x)\n\n\n# Training loop function to train the model\n# X: (num_xy, 2) tensor of (x, y) coordinates\n# y: (num_xy, 3) tensor of (r, g, b) pixel values\n# model: MLP model\n# lr: learning rate\n# epochs: number of epochs to train for\n# bs: batch size\n# print_every: print loss every print_every epochs\n# Logs losses\n# Saves the prediction frmo model every print_every epochs\n\ndef train(X, y, model, lr=0.01, epochs=1000, bs=1000, print_every=100):\n losses = []\n imgs = []\n optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n criterion = nn.MSELoss()\n for epoch in range(epochs):\n # Get a random batch of (x, y) coordinates\n idxs = torch.randperm(num_xy)[:bs]\n batch_X = X[idxs]\n batch_y = y[idxs]\n\n # Predict the (r, g, b) values\n pred_y = model(batch_X)\n\n # Compute the loss\n loss = criterion(pred_y, batch_y)\n\n # Zero gradients, perform a backward pass, and update the weights\n optimizer.zero_grad()\n loss.backward()\n optimizer.step()\n losses.append(loss.item())\n\n # Print loss every print_every epochs\n if epoch % print_every == 0:\n print(f\"Epoch {epoch} loss: {loss.item()}\")\n with torch.no_grad():\n # Predict the (r, g, b) values\n pred_y = model(X)\n\n # Reshape the predictions to be (3, height, width)\n pred_y = pred_y.transpose(0, 1).reshape(num_channels, height, width)\n imgs.append(pred_y.permute(1, 2, 0).detach().cpu())\n \n return losses, imgs\n\n\nm1 = MLP()\nm1 = m1.to(device)\nlosses_mlp, imgs = train(X, y, m1, lr=0.001, epochs=4000, bs=2000, print_every=100)\n\nEpoch 0 loss: 1.5234602689743042\nEpoch 100 loss: 0.0640626773238182\nEpoch 200 loss: 0.04388527199625969\nEpoch 300 loss: 0.03277464583516121\nEpoch 400 loss: 0.03183111175894737\nEpoch 500 loss: 0.02485758438706398\nEpoch 600 loss: 0.023289738222956657\nEpoch 700 loss: 0.024606380611658096\nEpoch 800 loss: 0.023782318457961082\nEpoch 900 loss: 0.026350615546107292\nEpoch 1000 loss: 0.025088826194405556\nEpoch 1100 loss: 0.023389095440506935\nEpoch 1200 loss: 0.02370390295982361\nEpoch 1300 loss: 0.023111725226044655\nEpoch 1400 loss: 0.023864751681685448\nEpoch 1500 loss: 0.021725382655858994\nEpoch 1600 loss: 0.021787280216813087\nEpoch 1700 loss: 0.021760988980531693\nEpoch 1800 loss: 0.021614212542772293\nEpoch 1900 loss: 0.020562106743454933\nEpoch 2000 loss: 0.019880816340446472\nEpoch 2100 loss: 0.01901845820248127\nEpoch 2200 loss: 0.018372364342212677\nEpoch 2300 loss: 0.01828525774180889\nEpoch 2400 loss: 0.018451901152729988\nEpoch 2500 loss: 0.01738181710243225\nEpoch 2600 loss: 0.01698809117078781\nEpoch 2700 loss: 0.01643018051981926\nEpoch 2800 loss: 0.01669265516102314\nEpoch 2900 loss: 0.01664060726761818\nEpoch 3000 loss: 0.01606595516204834\nEpoch 3100 loss: 0.01667209528386593\nEpoch 3200 loss: 0.015133237466216087\nEpoch 3300 loss: 0.014814447611570358\nEpoch 3400 loss: 0.01538220327347517\nEpoch 3500 loss: 0.01484852284193039\nEpoch 3600 loss: 0.01589234732091427\nEpoch 3700 loss: 0.014897373504936695\nEpoch 3800 loss: 0.014240250922739506\nEpoch 3900 loss: 0.015261288732290268\n\n\n\ndef plot_image(model, name=None):\n # Predict the (r, g, b) values\n pred_y = model(X)\n\n # Reshape the predictions to be (3, height, width)\n pred_y = pred_y.transpose(0, 1).reshape(num_channels, height, width)\n\n # plot the image\n plt.imshow(pred_y.permute(1, 2, 0).detach().cpu())\n if name:\n plt.savefig(name)\n\n\nplot_image(m1, \"mlp_dog.png\")\n\nClipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).\n\n\n\n\n\n\n\n\n\n\n# Create the animation from imgs and save it as a gif\n\nimport imageio\nimageio.mimsave('mlp.gif', imgs, fps=10)\n\nLossy conversion from float32 to uint8. Range [-13.466928482055664, 2.713646650314331]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.18658676743507385, 1.3069090843200684]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.18308542668819427, 1.0001248121261597]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.07874367386102676, 1.0167515277862549]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.09477106481790543, 1.0060935020446777]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.033188510686159134, 1.0109848976135254]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.0989738255739212, 1.0007272958755493]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.04943906515836716, 1.0269501209259033]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.02097826451063156, 1.0289174318313599]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.029821299016475677, 1.0194318294525146]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.016834549605846405, 1.0527536869049072]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.008144930005073547, 1.0191292762756348]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.009020708501338959, 1.0909096002578735]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.017141804099082947, 1.0371521711349487]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.013367637991905212, 1.0438421964645386]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.0005456805229187012, 1.0179295539855957]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.011109575629234314, 1.0290166139602661]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.020140215754508972, 1.078523874282837]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.0396433025598526, 1.0415352582931519]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.015714898705482483, 1.0283904075622559]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.04321514815092087, 1.0413591861724854]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.04679575562477112, 1.067355990409851]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.003602549433708191, 1.0755447149276733]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.007610529661178589, 1.052262306213379]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.033921219408512115, 1.0815953016281128]. Convert image to uint8 prior to saving to suppress this warning.\n\n\n\n\n# Create a MLP with 5 hidden layers with 256 neurons each and sine activations.\n# Input is (x, y) and output is (r, g, b)\n\nclass MLP_sin(nn.Module):\n def __init__(self):\n super().__init__()\n self.fc1 = nn.Linear(2, 256)\n self.fc2 = nn.Linear(256, 256)\n self.fc3 = nn.Linear(256, 256)\n self.fc4 = nn.Linear(256, 256)\n self.fc5 = nn.Linear(256, 256)\n self.fc6 = nn.Linear(256, 3)\n\n def forward(self, x):\n x = torch.sin(self.fc1(x))\n x = torch.sin(self.fc2(x))\n x = torch.sin(self.fc3(x))\n x = torch.sin(self.fc4(x))\n x = torch.sin(self.fc5(x))\n return self.fc6(x)\n\n\nm2 = MLP_sin()\nm2 = m2.to(device)\nlosses_mlp_sin, imgs = train(X, y, m2, lr=0.001, epochs=4000, bs=1000, print_every=100)\n\nEpoch 0 loss: 0.40150442719459534\nEpoch 100 loss: 0.03298206627368927\nEpoch 200 loss: 0.033279214054346085\nEpoch 300 loss: 0.03175220638513565\nEpoch 400 loss: 0.03205806389451027\nEpoch 500 loss: 0.03196191042661667\nEpoch 600 loss: 0.02972976118326187\nEpoch 700 loss: 0.029925711452960968\nEpoch 800 loss: 0.02968132309615612\nEpoch 900 loss: 0.028653116896748543\nEpoch 1000 loss: 0.02474542148411274\nEpoch 1100 loss: 0.020879685878753662\nEpoch 1200 loss: 0.019819265231490135\nEpoch 1300 loss: 0.016965048387646675\nEpoch 1400 loss: 0.013934656977653503\nEpoch 1500 loss: 0.011689499020576477\nEpoch 1600 loss: 0.010081701911985874\nEpoch 1700 loss: 0.007140354719012976\nEpoch 1800 loss: 0.006480662152171135\nEpoch 1900 loss: 0.005266484338790178\nEpoch 2000 loss: 0.004757172428071499\nEpoch 2100 loss: 0.003453798359259963\nEpoch 2200 loss: 0.0032651633955538273\nEpoch 2300 loss: 0.0028410402592271566\nEpoch 2400 loss: 0.0026403532829135656\nEpoch 2500 loss: 0.0019292739452794194\nEpoch 2600 loss: 0.0021367412991821766\nEpoch 2700 loss: 0.0020427301060408354\nEpoch 2800 loss: 0.0017756932647898793\nEpoch 2900 loss: 0.0016549285501241684\nEpoch 3000 loss: 0.0016728530172258615\nEpoch 3100 loss: 0.001471961266361177\nEpoch 3200 loss: 0.0014844941906630993\nEpoch 3300 loss: 0.0014798615593463182\nEpoch 3400 loss: 0.0012664658715948462\nEpoch 3500 loss: 0.0012708695139735937\nEpoch 3600 loss: 0.0012460555881261826\nEpoch 3700 loss: 0.0012855605455115438\nEpoch 3800 loss: 0.001190435141324997\nEpoch 3900 loss: 0.0011714434949681163\n\n\n\nplot_image(m2, \"mlp_sin_dog.png\")\n\nClipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).\n\n\n\n\n\n\n\n\n\n\nimageio.mimsave('mlp_sin.gif', imgs, fps=10)\n\nLossy conversion from float32 to uint8. Range [-0.1441832184791565, 0.3080734610557556]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.056166477501392365, 0.9270500540733337]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.04645712673664093, 0.9617018103599548]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.08092432469129562, 0.9469475746154785]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.13254448771476746, 1.0228846073150635]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.18537408113479614, 1.0271779298782349]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.15940740704536438, 1.069307804107666]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.1629665046930313, 1.0901581048965454]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.17787247896194458, 1.164113163948059]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.23600360751152039, 1.1689845323562622]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.1829279065132141, 1.1432479619979858]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.12739746272563934, 1.1281737089157104]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.11645704507827759, 1.1141674518585205]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.11797109246253967, 1.1277530193328857]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.09862736612558365, 1.0859858989715576]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.1146015003323555, 1.099491834640503]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.09405502676963806, 1.1023061275482178]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.132747620344162, 1.0877472162246704]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.11511929333209991, 1.0887328386306763]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.11015606671571732, 1.0807398557662964]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.09713895618915558, 1.087331771850586]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.0733504444360733, 1.0549205541610718]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.07674040645360947, 1.0766404867172241]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.07997756451368332, 1.0550076961517334]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.09363748133182526, 1.056591510772705]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.08970168232917786, 1.0528484582901]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.08736599236726761, 1.04934561252594]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.08859498053789139, 1.0708154439926147]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.08006224036216736, 1.0856648683547974]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.08170387893915176, 1.071043610572815]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.06969650834798813, 1.0583616495132446]. Convert image to uint8 prior to saving to suppress this warning.\n\n\n\n\n# Audio\n!wget https://www.vincentsitzmann.com/siren/img/audio/gt_bach.wav\n\n--2023-04-28 14:24:10-- https://www.vincentsitzmann.com/siren/img/audio/gt_bach.wav\nResolving www.vincentsitzmann.com (www.vincentsitzmann.com)... 185.199.111.153, 185.199.108.153, 185.199.110.153, ...\nConnecting to www.vincentsitzmann.com (www.vincentsitzmann.com)|185.199.111.153|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 1232886 (1.2M) [audio/wav]\nSaving to: ‘gt_bach.wav.3’\n\ngt_bach.wav.3 100%[===================>] 1.17M --.-KB/s in 0.06s \n\n2023-04-28 14:24:10 (19.7 MB/s) - ‘gt_bach.wav.3’ saved [1232886/1232886]\n\n\n\n\n# CLear CUDA cache\ntorch.cuda.empty_cache()\n\n\nfrom IPython.display import Audio\nAudio('gt_bach.wav')\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n# Read the audio file\nimport torchaudio\naudio, sr = torchaudio.load('gt_bach.wav')\n\n\nsr\n\n44100\n\n\n\naudio.shape\naudio = audio[0]\naudio = audio.to(device)\n\n\n# use last 2 seconds of audio\naudio = audio[-2 * sr:]\nX = torch.arange(0, len(audio)).unsqueeze(1).float().to(device)\n\n# Rescale X between -10 and 10\nX = X / X.max() * 20 - 10\nX.min(), X.max()\n\n(tensor(-10., device='cuda:0'), tensor(10., device='cuda:0'))\n\n\n\nX.shape, audio.shape, X\n\n(torch.Size([88200, 1]),\n torch.Size([88200]),\n tensor([[-10.0000],\n [ -9.9998],\n [ -9.9995],\n ...,\n [ 9.9995],\n [ 9.9998],\n [ 10.0000]], device='cuda:0'))\n\n\n\nAudio(audio.cpu(), rate=sr)\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\nclass SinActivation(torch.nn.Module):\n def __init__(self):\n super(SinActivation, self).__init__()\n return\n def forward(self, x):\n return torch.sin(x)\n \n\nclass SinActivation30(torch.nn.Module):\n def __init__(self):\n super(SinActivation30, self).__init__()\n return\n def forward(self, x):\n return torch.sin(30*x)\n\n\nimport torch.nn as nn\n\ndef create_mlp(n, m, f):\n \"\"\"\n n: number of hidden layers\n m: number of neurons in each hidden layer\n f: activation function\n ---\n Weighing initialization: \n uniform distribution between -30/input_dim and 30/input_dim for first layer\n -sqrt(6/input_dim) and sqrt(6/input_dim) for the rest\n\n Weight init is done in the forward pass\n \"\"\"\n\n layers = []\n layer1 = nn.Linear(1, m)\n torch.nn.init.uniform_(layer1.weight, a=-1/1, b=1/1)\n #torch.nn.init.uniform_(layer1.bias, a=-1/1, b=1/1)\n layers.append(layer1)\n layers.append(SinActivation30())\n for i in range(n):\n layer_i = nn.Linear(m, m)\n # Uniform distribution between -sqrt(6/input_dim) and sqrt(6/input_dim)\n torch.nn.init.uniform_(layer_i.weight, a=-np.sqrt(6/m), b=np.sqrt(6/m))\n torch.nn.init.uniform_(layer_i.bias, a=-np.sqrt(6/m), b=np.sqrt(6/m))\n layers.append(layer_i)\n layers.append(f)\n layers.append(nn.Linear(m, 1))\n\n return nn.Sequential(*layers)\n\n\nmlp_audio_sin_5_256 = create_mlp(5, 256, SinActivation()).to(device)\n#mlp_audio_sin_8_512 = create_mlp(8, 512, SinActivation()).to(device)\n#mlp_audio_sin_3_128 = create_mlp(3, 128, SinActivation()).to(device)\n\n\nmlp_audio_sin_5_128\n\nNameError: name 'mlp_audio_sin_5_128' is not defined\n\n\n\ndef train_audio(X, y, model, lr=0.01, epochs=1000, bs=1000, print_every=100):\n losses = []\n optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n criterion = nn.MSELoss()\n for epoch in range(epochs):\n num_rows = X.shape[0]\n idx = torch.randperm(num_rows)[:bs]\n batch_X = X[idx]\n batch_y = y[idx]\n pred_y = model(batch_X)\n\n # Compute the loss\n loss = criterion(pred_y, batch_y)\n\n # Zero gradients, perform a backward pass, and update the weights\n optimizer.zero_grad()\n loss.backward()\n optimizer.step()\n losses.append(loss.item())\n\n # Print loss every print_every epochs\n if epoch % print_every == 0:\n print(f\"Epoch {epoch} loss: {loss.item()}\")\n\n return losses\n\n\n#losses_mlp_sin_3_128 = train_audio(X, audio, mlp_audio_sin_3_128, lr=0.0001,\n# epochs=5000, bs=len(X)//2, print_every=100)\n\nlosses_mlp_sin_5_256 = train_audio(X, audio, mlp_audio_sin_5_256, lr=0.0001,\n epochs=5000, bs=len(X)//2, print_every=100)\n\nEpoch 0 loss: 0.210729718208313\n\n\nOutOfMemoryError: CUDA out of memory. Tried to allocate 7.25 GiB (GPU 0; 79.18 GiB total capacity; 63.06 GiB already allocated; 7.88 MiB free; 74.24 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n\n\n\nX\n\ntensor([[-1.7320],\n [-1.7320],\n [-1.7319],\n ...,\n [ 1.7319],\n [ 1.7320],\n [ 1.7320]], device='cuda:0')\n\n\n\nimport time\n\n\na = time.time()\nlosses_mlp_sin_8_512 = train_audio(X, audio, mlp_audio_sin_8_512, \n lr=0.0001, epochs=10, bs=len(X), print_every=1)\nb = time.time()\nprint(b-a)\n\nOutOfMemoryError: CUDA out of memory. Tried to allocate 28.98 GiB (GPU 0; 79.18 GiB total capacity; 33.40 GiB already allocated; 14.51 GiB free; 59.74 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n\n\n\nmlp_audio_sin_8_512 = torch.compile(mlp_audio_sin_8_512)\n\n\na = time.time()\nlosses_mlp_sin_8_512 = train_audio(X, audio, mlp_audio_sin_8_512, \n lr=0.0001, epochs=10, bs=len(X), print_every=1)\nb = time.time()\nprint(b-a)\n\nNameError: name 'time' is not defined\n\n\n\n# Plot the reconstruction\nwith torch.no_grad():\n #pred_y_5_256 = mlp_audio_sin_5_256(X)\n #pred_y_8_512 = mlp_audio_sin_8_512(X)\n pred_y_3_128 = mlp_audio_sin_3_128(X)\n plt.plot(audio.cpu().numpy(), label=\"Ground truth\")\n #plt.plot(pred_y_5_256.cpu().numpy(), label=\"MLP 5 layers 256 neurons\")\n plt.plot(pred_y_3_128.cpu().numpy(), label=\"MLP 8 layers 512 neurons\")\n plt.legend()\n\n\n\n\n\n\n\n\n\nimport pandas as pd\n\ndf = pd.DataFrame({\"GT audio\": audio.cpu().numpy(), \n \"MLP 5 layers 256 neurons\": pred_y_5_256.cpu().numpy().flatten(), \n \"MLP 8 layers 512 neurons\": pred_y_8_512.cpu().numpy().flatten()})\n\n\ndf.describe()\n\n\n\n\n\n\n\n\n\nGT audio\nMLP 5 layers 256 neurons\nMLP 8 layers 512 neurons\n\n\n\n\ncount\n88200.000000\n88200.000000\n88200.000000\n\n\nmean\n0.000127\n-0.013929\n-0.010819\n\n\nstd\n0.208728\n0.025773\n0.156109\n\n\nmin\n-0.868308\n-0.083747\n-0.710084\n\n\n25%\n-0.130095\n-0.030821\n-0.116540\n\n\n50%\n-0.002093\n-0.011080\n-0.010339\n\n\n75%\n0.130701\n0.002974\n0.094733\n\n\nmax\n1.000000\n0.051832\n0.658187\n\n\n\n\n\n\n\n\n\naudio.shape, pred_y_8_512.shape\n\n(torch.Size([88200]), torch.Size([88200, 1]))\n\n\n\n# Play the reconstruction\nAudio(pred_y_8_512.cpu().T, rate=sr)\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n\nTODO\n\nShow the gradient of the reconstructed image for different activation functions" + "text": "TLDR: Sine activation function is better than ReLU for reconstructing images\n\n\n\n\n\n\n\n\n\nReconstruction from ReLU\n\n\n\n\n\n\n\nReconstruction from Sine\n\n\n\n\n\n\n\nAnimation of the training process\n\n\n\n\n\n\n\n\n\nReconstruction from ReLU\n\n\n\n\n\n\n\nReconstruction from Sine\n\n\n\n\n\n\nimport torch\nimport torchvision\nimport torchvision.transforms as transforms\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n# Remove all the warnings\nimport warnings\nwarnings.filterwarnings('ignore')\n\n# Set env CUDA_LAUNCH_BLOCKING=1\nimport os\nos.environ['CUDA_LAUNCH_BLOCKING'] = '1'\n\n\n!wget https://segment-anything.com/assets/gallery/AdobeStock_94274587_welsh_corgi_pembroke_CD.jpg -O dog.jpg\n\n--2023-04-27 17:21:53-- https://segment-anything.com/assets/gallery/AdobeStock_94274587_welsh_corgi_pembroke_CD.jpg\nResolving segment-anything.com (segment-anything.com)... 108.138.128.23, 108.138.128.8, 108.138.128.34, ...\nConnecting to segment-anything.com (segment-anything.com)|108.138.128.23|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 221810 (217K) [image/jpeg]\nSaving to: ‘dog.jpg’\n\ndog.jpg 100%[===================>] 216.61K 400KB/s in 0.5s \n\n2023-04-27 17:21:55 (400 KB/s) - ‘dog.jpg’ saved [221810/221810]\n\n\n\n\n# Read in a image from torchvision\nimg = torchvision.io.read_image(\"dog.jpg\")\n\n\nplt.imshow(img.permute(1, 2, 0))\n\n\n\n\n\n\n\n\n\n# Normalize the image\nimg = img / 255.0\n\n\nimg.shape\n\ntorch.Size([3, 1365, 2048])\n\n\n\n# Take a random 224x224 crop of the image\ncrop = torchvision.transforms.functional.crop(img, 600, 750, 400, 400)\n\n\n# Plot the crop\nplt.imshow(crop.permute(1, 2, 0))\n\n\n\n\n\n\n\n\n\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n\n\n# Get the dimensions of the image tensor\nnum_channels, height, width = crop.shape\n\n# Create a 2D grid of (x,y) coordinates\nx_coords = torch.arange(width).repeat(height, 1)\ny_coords = torch.arange(height).repeat(width, 1).t()\nx_coords = x_coords.reshape(-1)\ny_coords = y_coords.reshape(-1)\n\n# Combine the x and y coordinates into a single tensor\nX = torch.stack([x_coords, y_coords], dim=1).float()\n\n# Move X to GPU if available\nX = X.to(device)\n\n\nnum_xy = height * width\nnum_xy\n\n160000\n\n\n\nX.shape, X\n\n(torch.Size([160000, 2]),\n tensor([[ 0., 0.],\n [ 1., 0.],\n [ 2., 0.],\n ...,\n [397., 399.],\n [398., 399.],\n [399., 399.]], device='cuda:0'))\n\n\n\n# Extract pixel values from image tensor\npixel_values = crop.reshape(num_channels, -1).float().to(device)\n\n# Transpose the pixel values to be (num_xy, num_channels)\npixel_values = pixel_values.transpose(0, 1)\n\ny = pixel_values.to(device)\n\n\n# Create a MLP with 5 hidden layers with 256 neurons each and ReLU activations.\n# Input is (x, y) and output is (r, g, b)\n\nclass MLP(nn.Module):\n def __init__(self):\n super().__init__()\n self.fc1 = nn.Linear(2, 256)\n self.fc2 = nn.Linear(256, 256)\n self.fc3 = nn.Linear(256, 256)\n self.fc4 = nn.Linear(256, 256)\n self.fc5 = nn.Linear(256, 256)\n self.fc6 = nn.Linear(256, 3)\n\n def forward(self, x):\n x = F.relu(self.fc1(x))\n x = F.relu(self.fc2(x))\n x = F.relu(self.fc3(x))\n x = F.relu(self.fc4(x))\n x = F.relu(self.fc5(x))\n return self.fc6(x)\n\n\n# Training loop function to train the model\n# X: (num_xy, 2) tensor of (x, y) coordinates\n# y: (num_xy, 3) tensor of (r, g, b) pixel values\n# model: MLP model\n# lr: learning rate\n# epochs: number of epochs to train for\n# bs: batch size\n# print_every: print loss every print_every epochs\n# Logs losses\n# Saves the prediction frmo model every print_every epochs\n\ndef train(X, y, model, lr=0.01, epochs=1000, bs=1000, print_every=100):\n losses = []\n imgs = []\n optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n criterion = nn.MSELoss()\n for epoch in range(epochs):\n # Get a random batch of (x, y) coordinates\n idxs = torch.randperm(num_xy)[:bs]\n batch_X = X[idxs]\n batch_y = y[idxs]\n\n # Predict the (r, g, b) values\n pred_y = model(batch_X)\n\n # Compute the loss\n loss = criterion(pred_y, batch_y)\n\n # Zero gradients, perform a backward pass, and update the weights\n optimizer.zero_grad()\n loss.backward()\n optimizer.step()\n losses.append(loss.item())\n\n # Print loss every print_every epochs\n if epoch % print_every == 0:\n print(f\"Epoch {epoch} loss: {loss.item()}\")\n with torch.no_grad():\n # Predict the (r, g, b) values\n pred_y = model(X)\n\n # Reshape the predictions to be (3, height, width)\n pred_y = pred_y.transpose(0, 1).reshape(num_channels, height, width)\n imgs.append(pred_y.permute(1, 2, 0).detach().cpu())\n \n return losses, imgs\n\n\nm1 = MLP()\nm1 = m1.to(device)\nlosses_mlp, imgs = train(X, y, m1, lr=0.001, epochs=4000, bs=2000, print_every=100)\n\nEpoch 0 loss: 1.5234602689743042\nEpoch 100 loss: 0.0640626773238182\nEpoch 200 loss: 0.04388527199625969\nEpoch 300 loss: 0.03277464583516121\nEpoch 400 loss: 0.03183111175894737\nEpoch 500 loss: 0.02485758438706398\nEpoch 600 loss: 0.023289738222956657\nEpoch 700 loss: 0.024606380611658096\nEpoch 800 loss: 0.023782318457961082\nEpoch 900 loss: 0.026350615546107292\nEpoch 1000 loss: 0.025088826194405556\nEpoch 1100 loss: 0.023389095440506935\nEpoch 1200 loss: 0.02370390295982361\nEpoch 1300 loss: 0.023111725226044655\nEpoch 1400 loss: 0.023864751681685448\nEpoch 1500 loss: 0.021725382655858994\nEpoch 1600 loss: 0.021787280216813087\nEpoch 1700 loss: 0.021760988980531693\nEpoch 1800 loss: 0.021614212542772293\nEpoch 1900 loss: 0.020562106743454933\nEpoch 2000 loss: 0.019880816340446472\nEpoch 2100 loss: 0.01901845820248127\nEpoch 2200 loss: 0.018372364342212677\nEpoch 2300 loss: 0.01828525774180889\nEpoch 2400 loss: 0.018451901152729988\nEpoch 2500 loss: 0.01738181710243225\nEpoch 2600 loss: 0.01698809117078781\nEpoch 2700 loss: 0.01643018051981926\nEpoch 2800 loss: 0.01669265516102314\nEpoch 2900 loss: 0.01664060726761818\nEpoch 3000 loss: 0.01606595516204834\nEpoch 3100 loss: 0.01667209528386593\nEpoch 3200 loss: 0.015133237466216087\nEpoch 3300 loss: 0.014814447611570358\nEpoch 3400 loss: 0.01538220327347517\nEpoch 3500 loss: 0.01484852284193039\nEpoch 3600 loss: 0.01589234732091427\nEpoch 3700 loss: 0.014897373504936695\nEpoch 3800 loss: 0.014240250922739506\nEpoch 3900 loss: 0.015261288732290268\n\n\n\ndef plot_image(model, name=None):\n # Predict the (r, g, b) values\n pred_y = model(X)\n\n # Reshape the predictions to be (3, height, width)\n pred_y = pred_y.transpose(0, 1).reshape(num_channels, height, width)\n\n # plot the image\n plt.imshow(pred_y.permute(1, 2, 0).detach().cpu())\n if name:\n plt.savefig(name)\n\n\nplot_image(m1, \"mlp_dog.png\")\n\nClipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).\n\n\n\n\n\n\n\n\n\n\n# Create the animation from imgs and save it as a gif\n\nimport imageio\nimageio.mimsave('mlp.gif', imgs, fps=10)\n\nLossy conversion from float32 to uint8. Range [-13.466928482055664, 2.713646650314331]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.18658676743507385, 1.3069090843200684]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.18308542668819427, 1.0001248121261597]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.07874367386102676, 1.0167515277862549]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.09477106481790543, 1.0060935020446777]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.033188510686159134, 1.0109848976135254]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.0989738255739212, 1.0007272958755493]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.04943906515836716, 1.0269501209259033]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.02097826451063156, 1.0289174318313599]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.029821299016475677, 1.0194318294525146]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.016834549605846405, 1.0527536869049072]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.008144930005073547, 1.0191292762756348]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.009020708501338959, 1.0909096002578735]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.017141804099082947, 1.0371521711349487]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.013367637991905212, 1.0438421964645386]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0.0005456805229187012, 1.0179295539855957]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.011109575629234314, 1.0290166139602661]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.020140215754508972, 1.078523874282837]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.0396433025598526, 1.0415352582931519]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.015714898705482483, 1.0283904075622559]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.04321514815092087, 1.0413591861724854]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.04679575562477112, 1.067355990409851]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.003602549433708191, 1.0755447149276733]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.007610529661178589, 1.052262306213379]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.033921219408512115, 1.0815953016281128]. Convert image to uint8 prior to saving to suppress this warning.\n\n\n\n\n# Create a MLP with 5 hidden layers with 256 neurons each and sine activations.\n# Input is (x, y) and output is (r, g, b)\n\nclass MLP_sin(nn.Module):\n def __init__(self):\n super().__init__()\n self.fc1 = nn.Linear(2, 256)\n self.fc2 = nn.Linear(256, 256)\n self.fc3 = nn.Linear(256, 256)\n self.fc4 = nn.Linear(256, 256)\n self.fc5 = nn.Linear(256, 256)\n self.fc6 = nn.Linear(256, 3)\n\n def forward(self, x):\n x = torch.sin(self.fc1(x))\n x = torch.sin(self.fc2(x))\n x = torch.sin(self.fc3(x))\n x = torch.sin(self.fc4(x))\n x = torch.sin(self.fc5(x))\n return self.fc6(x)\n\n\nm2 = MLP_sin()\nm2 = m2.to(device)\nlosses_mlp_sin, imgs = train(X, y, m2, lr=0.001, epochs=4000, bs=1000, print_every=100)\n\nEpoch 0 loss: 0.40150442719459534\nEpoch 100 loss: 0.03298206627368927\nEpoch 200 loss: 0.033279214054346085\nEpoch 300 loss: 0.03175220638513565\nEpoch 400 loss: 0.03205806389451027\nEpoch 500 loss: 0.03196191042661667\nEpoch 600 loss: 0.02972976118326187\nEpoch 700 loss: 0.029925711452960968\nEpoch 800 loss: 0.02968132309615612\nEpoch 900 loss: 0.028653116896748543\nEpoch 1000 loss: 0.02474542148411274\nEpoch 1100 loss: 0.020879685878753662\nEpoch 1200 loss: 0.019819265231490135\nEpoch 1300 loss: 0.016965048387646675\nEpoch 1400 loss: 0.013934656977653503\nEpoch 1500 loss: 0.011689499020576477\nEpoch 1600 loss: 0.010081701911985874\nEpoch 1700 loss: 0.007140354719012976\nEpoch 1800 loss: 0.006480662152171135\nEpoch 1900 loss: 0.005266484338790178\nEpoch 2000 loss: 0.004757172428071499\nEpoch 2100 loss: 0.003453798359259963\nEpoch 2200 loss: 0.0032651633955538273\nEpoch 2300 loss: 0.0028410402592271566\nEpoch 2400 loss: 0.0026403532829135656\nEpoch 2500 loss: 0.0019292739452794194\nEpoch 2600 loss: 0.0021367412991821766\nEpoch 2700 loss: 0.0020427301060408354\nEpoch 2800 loss: 0.0017756932647898793\nEpoch 2900 loss: 0.0016549285501241684\nEpoch 3000 loss: 0.0016728530172258615\nEpoch 3100 loss: 0.001471961266361177\nEpoch 3200 loss: 0.0014844941906630993\nEpoch 3300 loss: 0.0014798615593463182\nEpoch 3400 loss: 0.0012664658715948462\nEpoch 3500 loss: 0.0012708695139735937\nEpoch 3600 loss: 0.0012460555881261826\nEpoch 3700 loss: 0.0012855605455115438\nEpoch 3800 loss: 0.001190435141324997\nEpoch 3900 loss: 0.0011714434949681163\n\n\n\nplot_image(m2, \"mlp_sin_dog.png\")\n\nClipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).\n\n\n\n\n\n\n\n\n\n\nimageio.mimsave('mlp_sin.gif', imgs, fps=10)\n\nLossy conversion from float32 to uint8. Range [-0.1441832184791565, 0.3080734610557556]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.056166477501392365, 0.9270500540733337]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.04645712673664093, 0.9617018103599548]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.08092432469129562, 0.9469475746154785]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.13254448771476746, 1.0228846073150635]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.18537408113479614, 1.0271779298782349]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.15940740704536438, 1.069307804107666]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.1629665046930313, 1.0901581048965454]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.17787247896194458, 1.164113163948059]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.23600360751152039, 1.1689845323562622]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.1829279065132141, 1.1432479619979858]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.12739746272563934, 1.1281737089157104]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.11645704507827759, 1.1141674518585205]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.11797109246253967, 1.1277530193328857]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.09862736612558365, 1.0859858989715576]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.1146015003323555, 1.099491834640503]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.09405502676963806, 1.1023061275482178]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.132747620344162, 1.0877472162246704]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.11511929333209991, 1.0887328386306763]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.11015606671571732, 1.0807398557662964]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.09713895618915558, 1.087331771850586]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.0733504444360733, 1.0549205541610718]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.07674040645360947, 1.0766404867172241]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.07997756451368332, 1.0550076961517334]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.09363748133182526, 1.056591510772705]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.08970168232917786, 1.0528484582901]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.08736599236726761, 1.04934561252594]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.08859498053789139, 1.0708154439926147]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.08006224036216736, 1.0856648683547974]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.08170387893915176, 1.071043610572815]. Convert image to uint8 prior to saving to suppress this warning.\nLossy conversion from float32 to uint8. Range [-0.06969650834798813, 1.0583616495132446]. Convert image to uint8 prior to saving to suppress this warning.\n\n\n\n\n# Audio\n!wget https://www.vincentsitzmann.com/siren/img/audio/gt_bach.wav\n\n--2023-04-28 14:24:10-- https://www.vincentsitzmann.com/siren/img/audio/gt_bach.wav\nResolving www.vincentsitzmann.com (www.vincentsitzmann.com)... 185.199.111.153, 185.199.108.153, 185.199.110.153, ...\nConnecting to www.vincentsitzmann.com (www.vincentsitzmann.com)|185.199.111.153|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 1232886 (1.2M) [audio/wav]\nSaving to: ‘gt_bach.wav.3’\n\ngt_bach.wav.3 100%[===================>] 1.17M --.-KB/s in 0.06s \n\n2023-04-28 14:24:10 (19.7 MB/s) - ‘gt_bach.wav.3’ saved [1232886/1232886]\n\n\n\n\n# CLear CUDA cache\ntorch.cuda.empty_cache()\n\n\nfrom IPython.display import Audio\nAudio('gt_bach.wav')\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n# Read the audio file\nimport torchaudio\naudio, sr = torchaudio.load('gt_bach.wav')\n\n\nsr\n\n44100\n\n\n\naudio.shape\naudio = audio[0]\naudio = audio.to(device)\n\n\n# use last 2 seconds of audio\naudio = audio[-2 * sr:]\nX = torch.arange(0, len(audio)).unsqueeze(1).float().to(device)\n\n# Rescale X between -10 and 10\nX = X / X.max() * 20 - 10\nX.min(), X.max()\n\n(tensor(-10., device='cuda:0'), tensor(10., device='cuda:0'))\n\n\n\nX.shape, audio.shape, X\n\n(torch.Size([88200, 1]),\n torch.Size([88200]),\n tensor([[-10.0000],\n [ -9.9998],\n [ -9.9995],\n ...,\n [ 9.9995],\n [ 9.9998],\n [ 10.0000]], device='cuda:0'))\n\n\n\nAudio(audio.cpu(), rate=sr)\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\nclass SinActivation(torch.nn.Module):\n def __init__(self):\n super(SinActivation, self).__init__()\n return\n def forward(self, x):\n return torch.sin(x)\n \n\nclass SinActivation30(torch.nn.Module):\n def __init__(self):\n super(SinActivation30, self).__init__()\n return\n def forward(self, x):\n return torch.sin(30*x)\n\n\nimport torch.nn as nn\n\ndef create_mlp(n, m, f):\n \"\"\"\n n: number of hidden layers\n m: number of neurons in each hidden layer\n f: activation function\n ---\n Weighing initialization: \n uniform distribution between -30/input_dim and 30/input_dim for first layer\n -sqrt(6/input_dim) and sqrt(6/input_dim) for the rest\n\n Weight init is done in the forward pass\n \"\"\"\n\n layers = []\n layer1 = nn.Linear(1, m)\n torch.nn.init.uniform_(layer1.weight, a=-1/1, b=1/1)\n #torch.nn.init.uniform_(layer1.bias, a=-1/1, b=1/1)\n layers.append(layer1)\n layers.append(SinActivation30())\n for i in range(n):\n layer_i = nn.Linear(m, m)\n # Uniform distribution between -sqrt(6/input_dim) and sqrt(6/input_dim)\n torch.nn.init.uniform_(layer_i.weight, a=-np.sqrt(6/m), b=np.sqrt(6/m))\n torch.nn.init.uniform_(layer_i.bias, a=-np.sqrt(6/m), b=np.sqrt(6/m))\n layers.append(layer_i)\n layers.append(f)\n layers.append(nn.Linear(m, 1))\n\n return nn.Sequential(*layers)\n\n\nmlp_audio_sin_5_256 = create_mlp(5, 256, SinActivation()).to(device)\n#mlp_audio_sin_8_512 = create_mlp(8, 512, SinActivation()).to(device)\n#mlp_audio_sin_3_128 = create_mlp(3, 128, SinActivation()).to(device)\n\n\nmlp_audio_sin_5_128\n\nNameError: name 'mlp_audio_sin_5_128' is not defined\n\n\n\ndef train_audio(X, y, model, lr=0.01, epochs=1000, bs=1000, print_every=100):\n losses = []\n optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n criterion = nn.MSELoss()\n for epoch in range(epochs):\n num_rows = X.shape[0]\n idx = torch.randperm(num_rows)[:bs]\n batch_X = X[idx]\n batch_y = y[idx]\n pred_y = model(batch_X)\n\n # Compute the loss\n loss = criterion(pred_y, batch_y)\n\n # Zero gradients, perform a backward pass, and update the weights\n optimizer.zero_grad()\n loss.backward()\n optimizer.step()\n losses.append(loss.item())\n\n # Print loss every print_every epochs\n if epoch % print_every == 0:\n print(f\"Epoch {epoch} loss: {loss.item()}\")\n\n return losses\n\n\n#losses_mlp_sin_3_128 = train_audio(X, audio, mlp_audio_sin_3_128, lr=0.0001,\n# epochs=5000, bs=len(X)//2, print_every=100)\n\nlosses_mlp_sin_5_256 = train_audio(X, audio, mlp_audio_sin_5_256, lr=0.0001,\n epochs=5000, bs=len(X)//2, print_every=100)\n\nEpoch 0 loss: 0.210729718208313\n\n\nOutOfMemoryError: CUDA out of memory. Tried to allocate 7.25 GiB (GPU 0; 79.18 GiB total capacity; 63.06 GiB already allocated; 7.88 MiB free; 74.24 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n\n\n\nX\n\ntensor([[-1.7320],\n [-1.7320],\n [-1.7319],\n ...,\n [ 1.7319],\n [ 1.7320],\n [ 1.7320]], device='cuda:0')\n\n\n\nimport time\n\n\na = time.time()\nlosses_mlp_sin_8_512 = train_audio(X, audio, mlp_audio_sin_8_512, \n lr=0.0001, epochs=10, bs=len(X), print_every=1)\nb = time.time()\nprint(b-a)\n\nOutOfMemoryError: CUDA out of memory. Tried to allocate 28.98 GiB (GPU 0; 79.18 GiB total capacity; 33.40 GiB already allocated; 14.51 GiB free; 59.74 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n\n\n\nmlp_audio_sin_8_512 = torch.compile(mlp_audio_sin_8_512)\n\n\na = time.time()\nlosses_mlp_sin_8_512 = train_audio(X, audio, mlp_audio_sin_8_512, \n lr=0.0001, epochs=10, bs=len(X), print_every=1)\nb = time.time()\nprint(b-a)\n\nNameError: name 'time' is not defined\n\n\n\n# Plot the reconstruction\nwith torch.no_grad():\n #pred_y_5_256 = mlp_audio_sin_5_256(X)\n #pred_y_8_512 = mlp_audio_sin_8_512(X)\n pred_y_3_128 = mlp_audio_sin_3_128(X)\n plt.plot(audio.cpu().numpy(), label=\"Ground truth\")\n #plt.plot(pred_y_5_256.cpu().numpy(), label=\"MLP 5 layers 256 neurons\")\n plt.plot(pred_y_3_128.cpu().numpy(), label=\"MLP 8 layers 512 neurons\")\n plt.legend()\n\n\n\n\n\n\n\n\n\nimport pandas as pd\n\ndf = pd.DataFrame({\"GT audio\": audio.cpu().numpy(), \n \"MLP 5 layers 256 neurons\": pred_y_5_256.cpu().numpy().flatten(), \n \"MLP 8 layers 512 neurons\": pred_y_8_512.cpu().numpy().flatten()})\n\n\ndf.describe()\n\n\n\n\n\n\n\n\nGT audio\nMLP 5 layers 256 neurons\nMLP 8 layers 512 neurons\n\n\n\n\ncount\n88200.000000\n88200.000000\n88200.000000\n\n\nmean\n0.000127\n-0.013929\n-0.010819\n\n\nstd\n0.208728\n0.025773\n0.156109\n\n\nmin\n-0.868308\n-0.083747\n-0.710084\n\n\n25%\n-0.130095\n-0.030821\n-0.116540\n\n\n50%\n-0.002093\n-0.011080\n-0.010339\n\n\n75%\n0.130701\n0.002974\n0.094733\n\n\nmax\n1.000000\n0.051832\n0.658187\n\n\n\n\n\n\n\n\naudio.shape, pred_y_8_512.shape\n\n(torch.Size([88200]), torch.Size([88200, 1]))\n\n\n\n# Play the reconstruction\nAudio(pred_y_8_512.cpu().T, rate=sr)\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n\nTODO\n\nShow the gradient of the reconstructed image for different activation functions" }, { "objectID": "posts/2020-06-26-gp-understand.html", "href": "posts/2020-06-26-gp-understand.html", "title": "Understanding Kernels in Gaussian Processes Regression", "section": "", - "text": "Disclaimer\nThis blog post is forked from GPSS 2019 Lab 1. This is produced only for educational purposes. All credit goes to the GPSS organisers.\n\n# Support for maths\nimport numpy as np\n# Plotting tools\nfrom matplotlib import pyplot as plt\n# we use the following for plotting figures in jupyter\n%matplotlib inline\n\nimport warnings\nwarnings.filterwarnings('ignore')\n\n# GPy: Gaussian processes library\nimport GPy\nfrom IPython.display import display\n\n\n\nCovariance functions, aka kernels\nWe will define a covariance function, from hereon referred to as a kernel, using GPy. The most commonly used kernel in machine learning is the Gaussian-form radial basis function (RBF) kernel. It is also commonly referred to as the exponentiated quadratic or squared exponential kernel – all are equivalent.\nThe definition of the (1-dimensional) RBF kernel has a Gaussian-form, defined as:\n\\[\n \\kappa_\\mathrm{rbf}(x,x') = \\sigma^2\\exp\\left(-\\frac{(x-x')^2}{2\\mathscr{l}^2}\\right)\n\\]\nIt has two parameters, described as the variance, \\(\\sigma^2\\) and the lengthscale \\(\\mathscr{l}\\).\nIn GPy, we define our kernels using the input dimension as the first argument, in the simplest case input_dim=1 for 1-dimensional regression. We can also explicitly define the parameters, but for now we will use the default values:\n\n# Create a 1-D RBF kernel with default parameters\nk = GPy.kern.RBF(lengthscale=0.5, input_dim=1, variance=4)\n# Preview the kernel's parameters\nk\n\n\n\n\n\n\n\nrbf.\nvalue\nconstraints\npriors\n\n\nvariance\n4.0\n+ve\n\n\n\nlengthscale\n0.5\n+ve\n\n\n\n\n\n\n\n\nfig, ax = plt.subplots()\nfrom matplotlib.animation import FuncAnimation\nfrom matplotlib import rc\nls = [0.0005, 0.05, 0.25, 0.5, 1., 2., 4.]\n\nX = np.linspace(0.,1.,500)# 500 points evenly spaced over [0,1]\nX = X[:,None]\nmu = np.zeros((500))\n\ndef update(iteration):\n ax.cla()\n k = GPy.kern.RBF(1)\n k.lengthscale = ls[iteration]\n # Calculate the new covariance function at k(x,0)\n C = k.K(X,X)\n Z = np.random.multivariate_normal(mu,C,40)\n for i in range(40):\n ax.plot(X[:],Z[i,:],color='k',alpha=0.2)\n ax.set_title(\"$\\kappa_{rbf}(x,x')$\\nLength scale = %s\" %k.lengthscale[0]);\n ax.set_ylim((-4, 4))\n\n\n\nnum_iterations = len(ls)\nanim = FuncAnimation(fig, update, frames=np.arange(0, num_iterations-1, 1), interval=500)\nplt.close()\n\nrc('animation', html='jshtml')\nanim\n\n\n\n\n\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n Once\n \n Loop\n \n Reflect\n \n \n\n\n\n\n\n\n\n\nIn the animation above, as you increase the length scale, the learnt functions keep getting smoother.\n\nfig, ax = plt.subplots()\nfrom matplotlib.animation import FuncAnimation\nfrom matplotlib import rc\nvar = [0.0005, 0.05, 0.25, 0.5, 1., 2., 4., 9.]\n\nX = np.linspace(0.,1.,500)# 500 points evenly spaced over [0,1]\nX = X[:,None]\nmu = np.zeros((500))\n\ndef update(iteration):\n ax.cla()\n k = GPy.kern.RBF(1)\n k.variance = var[iteration]\n # Calculate the new covariance function at k(x,0)\n C = k.K(X,X)\n Z = np.random.multivariate_normal(mu,C,40)\n for i in range(40):\n ax.plot(X[:],Z[i,:],color='k',alpha=0.2)\n ax.set_title(\"$\\kappa_{rbf}(x,x')$\\nVariance = %s\" %k.variance[0]);\n ax.set_ylim((-4, 4))\n\n\n\nnum_iterations = len(ls)\nanim = FuncAnimation(fig, update, frames=np.arange(0, num_iterations-1, 1), interval=500)\nplt.close()\n\nrc('animation', html='jshtml')\nanim\n\n\n\n\n\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n Once\n \n Loop\n \n Reflect\n \n \n\n\n\n\n\n\n\n\nIn the animation above, as you increase the variance, the scale of values increases.\n\nX1 = np.array([1, 2, 3]).reshape(-1, 1)\n\ny1 = np.array([0, 1, 0]).reshape(-1, 1)\ny2 = np.array([0, -1, 0]).reshape(-1, 1)\ny3 = np.array([0, 10, 0]).reshape(-1, 1)\ny4 = np.array([0, 0.3, 0]).reshape(-1, 1)\n\n\nk = GPy.kern.RBF(lengthscale=0.5, input_dim=1, variance=4)\n\nm = GPy.models.GPRegression(X1, y1, k)\n#m.Gaussian_noise = 0.0\nm.optimize()\nprint(k)\nm.plot();\n\n rbf. | value | constraints | priors\n variance | 0.262031485550043 | +ve | \n lengthscale | 0.24277532672486218 | +ve | \n\n\n\n\n\n\n\n\n\n\nk = GPy.kern.RBF(lengthscale=0.5, input_dim=1, variance=4)\n\nm = GPy.models.GPRegression(X1, y2, k)\n#m.Gaussian_noise = 0.0\nm.optimize()\nprint(k)\nm.plot();\n\n rbf. | value | constraints | priors\n variance | 0.262031485550043 | +ve | \n lengthscale | 0.24277532672486218 | +ve | \n\n\n\n\n\n\n\n\n\n\n\nIn the above two examples, the y values are: 0, 1, 0 and 0, -1, 0. This shows smoothness. Thus, length scale can be big (0.24)\n\nk = GPy.kern.RBF(lengthscale=0.5, input_dim=1, variance=4)\n\nm = GPy.models.GPRegression(X1, y3, k)\n#m.Gaussian_noise = 0.0\nm.optimize()\nprint(k)\nm.plot();\n\n rbf. | value | constraints | priors\n variance | 16.918792970578004 | +ve | \n lengthscale | 0.07805339389352635 | +ve | \n\n\n\n\n\n\n\n\n\n\n\nIn the above example, the y values are: 0, 10, 0. The data set is not smooth. Thus, length scale learnt uis very small (0.24). Noise variance of RBF kernel also increased to accomodate the 10.\n\nk = GPy.kern.RBF(lengthscale=0.5, input_dim=1, variance=4)\n\nm = GPy.models.GPRegression(X1, y4, k)\n#m.Gaussian_noise = 0.0\nm.optimize()\nprint(k)\nm.plot();\n\n rbf. | value | constraints | priors\n variance | 5.90821963086592e-06 | +ve | \n lengthscale | 2.163452641925496 | +ve | \n\n\n\n\n\n\n\n\n\n\n\nIn the above examples, the y values are: 0, 0.3, 0. The data set is the smoothest amongst the four. Thus, length scale learnt is large (2.1). Noise variance of RBF kernel is also small." + "text": "Disclaimer\nThis blog post is forked from GPSS 2019 Lab 1. This is produced only for educational purposes. All credit goes to the GPSS organisers.\n\n# Support for maths\nimport numpy as np\n# Plotting tools\nfrom matplotlib import pyplot as plt\n# we use the following for plotting figures in jupyter\n%matplotlib inline\n\nimport warnings\nwarnings.filterwarnings('ignore')\n\n# GPy: Gaussian processes library\nimport GPy\nfrom IPython.display import display\n\n\n\nCovariance functions, aka kernels\nWe will define a covariance function, from hereon referred to as a kernel, using GPy. The most commonly used kernel in machine learning is the Gaussian-form radial basis function (RBF) kernel. It is also commonly referred to as the exponentiated quadratic or squared exponential kernel – all are equivalent.\nThe definition of the (1-dimensional) RBF kernel has a Gaussian-form, defined as:\n\\[\n \\kappa_\\mathrm{rbf}(x,x') = \\sigma^2\\exp\\left(-\\frac{(x-x')^2}{2\\mathscr{l}^2}\\right)\n\\]\nIt has two parameters, described as the variance, \\(\\sigma^2\\) and the lengthscale \\(\\mathscr{l}\\).\nIn GPy, we define our kernels using the input dimension as the first argument, in the simplest case input_dim=1 for 1-dimensional regression. We can also explicitly define the parameters, but for now we will use the default values:\n\n# Create a 1-D RBF kernel with default parameters\nk = GPy.kern.RBF(lengthscale=0.5, input_dim=1, variance=4)\n# Preview the kernel's parameters\nk\n\n\n\n\n\n\nrbf.\nvalue\nconstraints\npriors\n\n\nvariance\n4.0\n+ve\n\n\n\nlengthscale\n0.5\n+ve\n\n\n\n\n\n\n\nfig, ax = plt.subplots()\nfrom matplotlib.animation import FuncAnimation\nfrom matplotlib import rc\nls = [0.0005, 0.05, 0.25, 0.5, 1., 2., 4.]\n\nX = np.linspace(0.,1.,500)# 500 points evenly spaced over [0,1]\nX = X[:,None]\nmu = np.zeros((500))\n\ndef update(iteration):\n ax.cla()\n k = GPy.kern.RBF(1)\n k.lengthscale = ls[iteration]\n # Calculate the new covariance function at k(x,0)\n C = k.K(X,X)\n Z = np.random.multivariate_normal(mu,C,40)\n for i in range(40):\n ax.plot(X[:],Z[i,:],color='k',alpha=0.2)\n ax.set_title(\"$\\kappa_{rbf}(x,x')$\\nLength scale = %s\" %k.lengthscale[0]);\n ax.set_ylim((-4, 4))\n\n\n\nnum_iterations = len(ls)\nanim = FuncAnimation(fig, update, frames=np.arange(0, num_iterations-1, 1), interval=500)\nplt.close()\n\nrc('animation', html='jshtml')\nanim\n\n\n\n\n\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n Once\n \n Loop\n \n Reflect\n \n \n\n\n\n\n\n\n\n\nIn the animation above, as you increase the length scale, the learnt functions keep getting smoother.\n\nfig, ax = plt.subplots()\nfrom matplotlib.animation import FuncAnimation\nfrom matplotlib import rc\nvar = [0.0005, 0.05, 0.25, 0.5, 1., 2., 4., 9.]\n\nX = np.linspace(0.,1.,500)# 500 points evenly spaced over [0,1]\nX = X[:,None]\nmu = np.zeros((500))\n\ndef update(iteration):\n ax.cla()\n k = GPy.kern.RBF(1)\n k.variance = var[iteration]\n # Calculate the new covariance function at k(x,0)\n C = k.K(X,X)\n Z = np.random.multivariate_normal(mu,C,40)\n for i in range(40):\n ax.plot(X[:],Z[i,:],color='k',alpha=0.2)\n ax.set_title(\"$\\kappa_{rbf}(x,x')$\\nVariance = %s\" %k.variance[0]);\n ax.set_ylim((-4, 4))\n\n\n\nnum_iterations = len(ls)\nanim = FuncAnimation(fig, update, frames=np.arange(0, num_iterations-1, 1), interval=500)\nplt.close()\n\nrc('animation', html='jshtml')\nanim\n\n\n\n\n\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n Once\n \n Loop\n \n Reflect\n \n \n\n\n\n\n\n\n\n\nIn the animation above, as you increase the variance, the scale of values increases.\n\nX1 = np.array([1, 2, 3]).reshape(-1, 1)\n\ny1 = np.array([0, 1, 0]).reshape(-1, 1)\ny2 = np.array([0, -1, 0]).reshape(-1, 1)\ny3 = np.array([0, 10, 0]).reshape(-1, 1)\ny4 = np.array([0, 0.3, 0]).reshape(-1, 1)\n\n\nk = GPy.kern.RBF(lengthscale=0.5, input_dim=1, variance=4)\n\nm = GPy.models.GPRegression(X1, y1, k)\n#m.Gaussian_noise = 0.0\nm.optimize()\nprint(k)\nm.plot();\n\n rbf. | value | constraints | priors\n variance | 0.262031485550043 | +ve | \n lengthscale | 0.24277532672486218 | +ve | \n\n\n\n\n\n\n\n\n\n\nk = GPy.kern.RBF(lengthscale=0.5, input_dim=1, variance=4)\n\nm = GPy.models.GPRegression(X1, y2, k)\n#m.Gaussian_noise = 0.0\nm.optimize()\nprint(k)\nm.plot();\n\n rbf. | value | constraints | priors\n variance | 0.262031485550043 | +ve | \n lengthscale | 0.24277532672486218 | +ve | \n\n\n\n\n\n\n\n\n\n\n\nIn the above two examples, the y values are: 0, 1, 0 and 0, -1, 0. This shows smoothness. Thus, length scale can be big (0.24)\n\nk = GPy.kern.RBF(lengthscale=0.5, input_dim=1, variance=4)\n\nm = GPy.models.GPRegression(X1, y3, k)\n#m.Gaussian_noise = 0.0\nm.optimize()\nprint(k)\nm.plot();\n\n rbf. | value | constraints | priors\n variance | 16.918792970578004 | +ve | \n lengthscale | 0.07805339389352635 | +ve | \n\n\n\n\n\n\n\n\n\n\n\nIn the above example, the y values are: 0, 10, 0. The data set is not smooth. Thus, length scale learnt uis very small (0.24). Noise variance of RBF kernel also increased to accomodate the 10.\n\nk = GPy.kern.RBF(lengthscale=0.5, input_dim=1, variance=4)\n\nm = GPy.models.GPRegression(X1, y4, k)\n#m.Gaussian_noise = 0.0\nm.optimize()\nprint(k)\nm.plot();\n\n rbf. | value | constraints | priors\n variance | 5.90821963086592e-06 | +ve | \n lengthscale | 2.163452641925496 | +ve | \n\n\n\n\n\n\n\n\n\n\n\nIn the above examples, the y values are: 0, 0.3, 0. The data set is the smoothest amongst the four. Thus, length scale learnt is large (2.1). Noise variance of RBF kernel is also small." }, { "objectID": "posts/2021-06-18-audio-filters.html", "href": "posts/2021-06-18-audio-filters.html", "title": "Audio Filtering on the command line and Python", "section": "", - "text": "In this post I will look into some filters for audio processing in ffmpeg, sox, and Python. I have recorded a small 6 second audio clip where for the first couple of seconds I was not speaking, but background noise is present.\nI had recorded the audio on my Apple device and it was default recorded in .m4a format. I convert it to the wav format. I use ffmpeg for the same. In addition, I am using two flags: -v quiet to reduce the amount of information printed on the console. Second, I am using -y to overwrite an existing file with the same name.\n\n!ffmpeg -i Test.m4a Test.wav -v quiet -y\n\n\nfrom IPython.display import Audio\nimport matplotlib.pyplot as plt\n%matplotlib inline\n\n\nAudio(\"Test.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!ffmpeg -i Test.wav -lavfi showspectrumpic=s=720x540:color='magma' ../images/input-spectogram.png -y -v quiet\n\n\nAs can be seen in the above image, I am speaking somewhere close to 3.70 seconds onwards. However, the audio is pretty noisy before this even though I am not speaking. This is due to the background noise coming in from the fans and the air conditioning system.\n\n!sox Test.wav -n spectrogram -o ../images/sox-sg.png\n\n\n\n!sox Test.wav -n rate 32k spectrogram -o ../images/sox-sg-trimmed.png \n\n\nI’ll now get some attributes of the post that are required for processing, such as the recording rate. ## Getting attributes of the recorded file\n\n!ffmpeg -i Test.wav\n\nffmpeg version 4.4 Copyright (c) 2000-2021 the FFmpeg developers\n built with Apple clang version 12.0.5 (clang-1205.0.22.9)\n configuration: --prefix=/usr/local/Cellar/ffmpeg/4.4_2 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libbluray --enable-libdav1d --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-avresample --enable-videotoolbox\n libavutil 56. 70.100 / 56. 70.100\n libavcodec 58.134.100 / 58.134.100\n libavformat 58. 76.100 / 58. 76.100\n libavdevice 58. 13.100 / 58. 13.100\n libavfilter 7.110.100 / 7.110.100\n libavresample 4. 0. 0 / 4. 0. 0\n libswscale 5. 9.100 / 5. 9.100\n libswresample 3. 9.100 / 3. 9.100\n libpostproc 55. 9.100 / 55. 9.100\nGuessed Channel Layout for Input Stream #0.0 : mono\nInput #0, wav, from 'Test.wav':\n Metadata:\n title : Test\n encoder : Lavf58.76.100\n Duration: 00:00:06.63, bitrate: 768 kb/s\n Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 48000 Hz, mono, s16, 768 kb/s\nAt least one output file must be specified\n\n\nAs can be seen from the cell above, the recording rate is 48 kHz. We will need this when we do some processing in Python.\nBuilding a noise profile from first 3 second\n\n!ffmpeg -i Test.wav -ss 0 -to 3.5 -c copy Noise-Test.wav -v quiet -y\n\n\nAudio('Noise-Test.wav')\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox Noise-Test.wav -n rate 32k spectrogram -o ../images/sox-noise.png \n\n\n\n!sox Noise-Test.wav -n noiseprof noise.prof\n\n\n!sox Noise-Test.wav Noise-Test-cleaned.wav noisered noise.prof 0.21\n\n\nAudio(\"Noise-Test-cleaned.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox Test.wav Test-cleaned-05.wav noisered noise.prof 0.05\n\n\n!sox Test.wav Test-cleaned-18.wav noisered noise.prof 0.18\n!sox Test.wav Test-cleaned-21.wav noisered noise.prof 0.21\n\n\nAudio(\"Test-cleaned-05.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\nAudio(\"Test-cleaned-18.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\nAudio(\"Test-cleaned-21.wav\")\n\n\n!sox Test-cleaned-21.wav -n rate 32k spectrogram -o ../images/sox-cleaned-21.png \n\n\n\n!sox Test-cleaned-05.wav -n rate 32k spectrogram -o ../images/sox-cleaned-05.png \n\n\n\nAudio(\"Test-audacity.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox Test-audacity.wav -n rate 32k spectrogram -o ../images/sg-audacity.png \n\n\n\n!ffmpeg -i Test.wav -filter:a \"highpass=f=300\" high-passed.wav -y -v quiet\n\n\n\nAudio(\"high-passed.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox high-passed.wav -n rate 32k spectrogram -o ../images/highpass.png \n\n\n\nAudio(\"test-imovie.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox test-imovie.wav -n remix 1 rate 32k spectrogram -o ../images/imovie.png \n\n\nimport mediapy\n\norig = mediapy.read_image('../images/sox-sg-trimmed.png')\naudacity = mediapy.read_image('../images/sg-audacity.png')\nsox_21 = mediapy.read_image('../images/sox-cleaned-21.png')\nsox_05 = mediapy.read_image('../images/sox-cleaned-05.png')\nhigh_pass_300 = mediapy.read_image('../images/highpass.png')\nimovie = mediapy.read_image('../images/imovie.png')\n\n\n\n\nmediapy.show_images({'Original':orig, \n 'Audacity':audacity,\n 'Sox:0.21':sox_21,\n 'Sox:0.05':sox_05,\n 'HPF:300': high_pass_300,\n 'imovie':imovie},\n cmap='magma', columns=4, height=200 )\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nOriginal\n\n\n\n\n\n\n\n\n\nAudacity\n\n\n\n\n\n\n\n\n\nSox:0.21\n\n\n\n\n\n\n\n\n\nSox:0.05\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHPF:300\n\n\n\n\n\n\n\n\n\nimovie\n\n\n\n\n\n\n\n\n\n\n\n\n\n!sox test-audacity.wav output.dat\n\n\nimport pandas as pd\ndf = pd.read_csv(\"output.dat\", skiprows=2, index_col=0, names=['values'],delim_whitespace=True)\ndf = df.astype('float64')\n\n\ndf.plot()" + "text": "In this post I will look into some filters for audio processing in ffmpeg, sox, and Python. I have recorded a small 6 second audio clip where for the first couple of seconds I was not speaking, but background noise is present.\nI had recorded the audio on my Apple device and it was default recorded in .m4a format. I convert it to the wav format. I use ffmpeg for the same. In addition, I am using two flags: -v quiet to reduce the amount of information printed on the console. Second, I am using -y to overwrite an existing file with the same name.\n\n!ffmpeg -i Test.m4a Test.wav -v quiet -y\n\n\nfrom IPython.display import Audio\nimport matplotlib.pyplot as plt\n%matplotlib inline\n\n\nAudio(\"Test.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!ffmpeg -i Test.wav -lavfi showspectrumpic=s=720x540:color='magma' ../images/input-spectogram.png -y -v quiet\n\n\nAs can be seen in the above image, I am speaking somewhere close to 3.70 seconds onwards. However, the audio is pretty noisy before this even though I am not speaking. This is due to the background noise coming in from the fans and the air conditioning system.\n\n!sox Test.wav -n spectrogram -o ../images/sox-sg.png\n\n\n\n!sox Test.wav -n rate 32k spectrogram -o ../images/sox-sg-trimmed.png \n\n\nI’ll now get some attributes of the post that are required for processing, such as the recording rate. ## Getting attributes of the recorded file\n\n!ffmpeg -i Test.wav\n\nffmpeg version 4.4 Copyright (c) 2000-2021 the FFmpeg developers\n built with Apple clang version 12.0.5 (clang-1205.0.22.9)\n configuration: --prefix=/usr/local/Cellar/ffmpeg/4.4_2 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libbluray --enable-libdav1d --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-avresample --enable-videotoolbox\n libavutil 56. 70.100 / 56. 70.100\n libavcodec 58.134.100 / 58.134.100\n libavformat 58. 76.100 / 58. 76.100\n libavdevice 58. 13.100 / 58. 13.100\n libavfilter 7.110.100 / 7.110.100\n libavresample 4. 0. 0 / 4. 0. 0\n libswscale 5. 9.100 / 5. 9.100\n libswresample 3. 9.100 / 3. 9.100\n libpostproc 55. 9.100 / 55. 9.100\nGuessed Channel Layout for Input Stream #0.0 : mono\nInput #0, wav, from 'Test.wav':\n Metadata:\n title : Test\n encoder : Lavf58.76.100\n Duration: 00:00:06.63, bitrate: 768 kb/s\n Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 48000 Hz, mono, s16, 768 kb/s\nAt least one output file must be specified\n\n\nAs can be seen from the cell above, the recording rate is 48 kHz. We will need this when we do some processing in Python.\nBuilding a noise profile from first 3 second\n\n!ffmpeg -i Test.wav -ss 0 -to 3.5 -c copy Noise-Test.wav -v quiet -y\n\n\nAudio('Noise-Test.wav')\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox Noise-Test.wav -n rate 32k spectrogram -o ../images/sox-noise.png \n\n\n\n!sox Noise-Test.wav -n noiseprof noise.prof\n\n\n!sox Noise-Test.wav Noise-Test-cleaned.wav noisered noise.prof 0.21\n\n\nAudio(\"Noise-Test-cleaned.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox Test.wav Test-cleaned-05.wav noisered noise.prof 0.05\n\n\n!sox Test.wav Test-cleaned-18.wav noisered noise.prof 0.18\n!sox Test.wav Test-cleaned-21.wav noisered noise.prof 0.21\n\n\nAudio(\"Test-cleaned-05.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\nAudio(\"Test-cleaned-18.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\nAudio(\"Test-cleaned-21.wav\")\n\n\n!sox Test-cleaned-21.wav -n rate 32k spectrogram -o ../images/sox-cleaned-21.png \n\n\n\n!sox Test-cleaned-05.wav -n rate 32k spectrogram -o ../images/sox-cleaned-05.png \n\n\n\nAudio(\"Test-audacity.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox Test-audacity.wav -n rate 32k spectrogram -o ../images/sg-audacity.png \n\n\n\n!ffmpeg -i Test.wav -filter:a \"highpass=f=300\" high-passed.wav -y -v quiet\n\n\n\nAudio(\"high-passed.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox high-passed.wav -n rate 32k spectrogram -o ../images/highpass.png \n\n\n\nAudio(\"test-imovie.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox test-imovie.wav -n remix 1 rate 32k spectrogram -o ../images/imovie.png \n\n\nimport mediapy\n\norig = mediapy.read_image('../images/sox-sg-trimmed.png')\naudacity = mediapy.read_image('../images/sg-audacity.png')\nsox_21 = mediapy.read_image('../images/sox-cleaned-21.png')\nsox_05 = mediapy.read_image('../images/sox-cleaned-05.png')\nhigh_pass_300 = mediapy.read_image('../images/highpass.png')\nimovie = mediapy.read_image('../images/imovie.png')\n\n\n\n\nmediapy.show_images({'Original':orig, \n 'Audacity':audacity,\n 'Sox:0.21':sox_21,\n 'Sox:0.05':sox_05,\n 'HPF:300': high_pass_300,\n 'imovie':imovie},\n cmap='magma', columns=4, height=200 )\n\n\n\n\n\n\n\n\n\n\n\n\n\nOriginal\n\n\n\n\n\n\n\n\n\nAudacity\n\n\n\n\n\n\n\n\n\nSox:0.21\n\n\n\n\n\n\n\n\n\nSox:0.05\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHPF:300\n\n\n\n\n\n\n\n\n\nimovie\n\n\n\n\n\n\n\n\n\n\n\n\n!sox test-audacity.wav output.dat\n\n\nimport pandas as pd\ndf = pd.read_csv(\"output.dat\", skiprows=2, index_col=0, names=['values'],delim_whitespace=True)\ndf = df.astype('float64')\n\n\ndf.plot()" }, { "objectID": "posts/2021-06-18-audio-filters.html#introduction", "href": "posts/2021-06-18-audio-filters.html#introduction", "title": "Audio Filtering on the command line and Python", "section": "", - "text": "In this post I will look into some filters for audio processing in ffmpeg, sox, and Python. I have recorded a small 6 second audio clip where for the first couple of seconds I was not speaking, but background noise is present.\nI had recorded the audio on my Apple device and it was default recorded in .m4a format. I convert it to the wav format. I use ffmpeg for the same. In addition, I am using two flags: -v quiet to reduce the amount of information printed on the console. Second, I am using -y to overwrite an existing file with the same name.\n\n!ffmpeg -i Test.m4a Test.wav -v quiet -y\n\n\nfrom IPython.display import Audio\nimport matplotlib.pyplot as plt\n%matplotlib inline\n\n\nAudio(\"Test.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!ffmpeg -i Test.wav -lavfi showspectrumpic=s=720x540:color='magma' ../images/input-spectogram.png -y -v quiet\n\n\nAs can be seen in the above image, I am speaking somewhere close to 3.70 seconds onwards. However, the audio is pretty noisy before this even though I am not speaking. This is due to the background noise coming in from the fans and the air conditioning system.\n\n!sox Test.wav -n spectrogram -o ../images/sox-sg.png\n\n\n\n!sox Test.wav -n rate 32k spectrogram -o ../images/sox-sg-trimmed.png \n\n\nI’ll now get some attributes of the post that are required for processing, such as the recording rate. ## Getting attributes of the recorded file\n\n!ffmpeg -i Test.wav\n\nffmpeg version 4.4 Copyright (c) 2000-2021 the FFmpeg developers\n built with Apple clang version 12.0.5 (clang-1205.0.22.9)\n configuration: --prefix=/usr/local/Cellar/ffmpeg/4.4_2 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libbluray --enable-libdav1d --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-avresample --enable-videotoolbox\n libavutil 56. 70.100 / 56. 70.100\n libavcodec 58.134.100 / 58.134.100\n libavformat 58. 76.100 / 58. 76.100\n libavdevice 58. 13.100 / 58. 13.100\n libavfilter 7.110.100 / 7.110.100\n libavresample 4. 0. 0 / 4. 0. 0\n libswscale 5. 9.100 / 5. 9.100\n libswresample 3. 9.100 / 3. 9.100\n libpostproc 55. 9.100 / 55. 9.100\nGuessed Channel Layout for Input Stream #0.0 : mono\nInput #0, wav, from 'Test.wav':\n Metadata:\n title : Test\n encoder : Lavf58.76.100\n Duration: 00:00:06.63, bitrate: 768 kb/s\n Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 48000 Hz, mono, s16, 768 kb/s\nAt least one output file must be specified\n\n\nAs can be seen from the cell above, the recording rate is 48 kHz. We will need this when we do some processing in Python.\nBuilding a noise profile from first 3 second\n\n!ffmpeg -i Test.wav -ss 0 -to 3.5 -c copy Noise-Test.wav -v quiet -y\n\n\nAudio('Noise-Test.wav')\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox Noise-Test.wav -n rate 32k spectrogram -o ../images/sox-noise.png \n\n\n\n!sox Noise-Test.wav -n noiseprof noise.prof\n\n\n!sox Noise-Test.wav Noise-Test-cleaned.wav noisered noise.prof 0.21\n\n\nAudio(\"Noise-Test-cleaned.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox Test.wav Test-cleaned-05.wav noisered noise.prof 0.05\n\n\n!sox Test.wav Test-cleaned-18.wav noisered noise.prof 0.18\n!sox Test.wav Test-cleaned-21.wav noisered noise.prof 0.21\n\n\nAudio(\"Test-cleaned-05.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\nAudio(\"Test-cleaned-18.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\nAudio(\"Test-cleaned-21.wav\")\n\n\n!sox Test-cleaned-21.wav -n rate 32k spectrogram -o ../images/sox-cleaned-21.png \n\n\n\n!sox Test-cleaned-05.wav -n rate 32k spectrogram -o ../images/sox-cleaned-05.png \n\n\n\nAudio(\"Test-audacity.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox Test-audacity.wav -n rate 32k spectrogram -o ../images/sg-audacity.png \n\n\n\n!ffmpeg -i Test.wav -filter:a \"highpass=f=300\" high-passed.wav -y -v quiet\n\n\n\nAudio(\"high-passed.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox high-passed.wav -n rate 32k spectrogram -o ../images/highpass.png \n\n\n\nAudio(\"test-imovie.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox test-imovie.wav -n remix 1 rate 32k spectrogram -o ../images/imovie.png \n\n\nimport mediapy\n\norig = mediapy.read_image('../images/sox-sg-trimmed.png')\naudacity = mediapy.read_image('../images/sg-audacity.png')\nsox_21 = mediapy.read_image('../images/sox-cleaned-21.png')\nsox_05 = mediapy.read_image('../images/sox-cleaned-05.png')\nhigh_pass_300 = mediapy.read_image('../images/highpass.png')\nimovie = mediapy.read_image('../images/imovie.png')\n\n\n\n\nmediapy.show_images({'Original':orig, \n 'Audacity':audacity,\n 'Sox:0.21':sox_21,\n 'Sox:0.05':sox_05,\n 'HPF:300': high_pass_300,\n 'imovie':imovie},\n cmap='magma', columns=4, height=200 )\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nOriginal\n\n\n\n\n\n\n\n\n\nAudacity\n\n\n\n\n\n\n\n\n\nSox:0.21\n\n\n\n\n\n\n\n\n\nSox:0.05\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHPF:300\n\n\n\n\n\n\n\n\n\nimovie\n\n\n\n\n\n\n\n\n\n\n\n\n\n!sox test-audacity.wav output.dat\n\n\nimport pandas as pd\ndf = pd.read_csv(\"output.dat\", skiprows=2, index_col=0, names=['values'],delim_whitespace=True)\ndf = df.astype('float64')\n\n\ndf.plot()" + "text": "In this post I will look into some filters for audio processing in ffmpeg, sox, and Python. I have recorded a small 6 second audio clip where for the first couple of seconds I was not speaking, but background noise is present.\nI had recorded the audio on my Apple device and it was default recorded in .m4a format. I convert it to the wav format. I use ffmpeg for the same. In addition, I am using two flags: -v quiet to reduce the amount of information printed on the console. Second, I am using -y to overwrite an existing file with the same name.\n\n!ffmpeg -i Test.m4a Test.wav -v quiet -y\n\n\nfrom IPython.display import Audio\nimport matplotlib.pyplot as plt\n%matplotlib inline\n\n\nAudio(\"Test.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!ffmpeg -i Test.wav -lavfi showspectrumpic=s=720x540:color='magma' ../images/input-spectogram.png -y -v quiet\n\n\nAs can be seen in the above image, I am speaking somewhere close to 3.70 seconds onwards. However, the audio is pretty noisy before this even though I am not speaking. This is due to the background noise coming in from the fans and the air conditioning system.\n\n!sox Test.wav -n spectrogram -o ../images/sox-sg.png\n\n\n\n!sox Test.wav -n rate 32k spectrogram -o ../images/sox-sg-trimmed.png \n\n\nI’ll now get some attributes of the post that are required for processing, such as the recording rate. ## Getting attributes of the recorded file\n\n!ffmpeg -i Test.wav\n\nffmpeg version 4.4 Copyright (c) 2000-2021 the FFmpeg developers\n built with Apple clang version 12.0.5 (clang-1205.0.22.9)\n configuration: --prefix=/usr/local/Cellar/ffmpeg/4.4_2 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libbluray --enable-libdav1d --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-avresample --enable-videotoolbox\n libavutil 56. 70.100 / 56. 70.100\n libavcodec 58.134.100 / 58.134.100\n libavformat 58. 76.100 / 58. 76.100\n libavdevice 58. 13.100 / 58. 13.100\n libavfilter 7.110.100 / 7.110.100\n libavresample 4. 0. 0 / 4. 0. 0\n libswscale 5. 9.100 / 5. 9.100\n libswresample 3. 9.100 / 3. 9.100\n libpostproc 55. 9.100 / 55. 9.100\nGuessed Channel Layout for Input Stream #0.0 : mono\nInput #0, wav, from 'Test.wav':\n Metadata:\n title : Test\n encoder : Lavf58.76.100\n Duration: 00:00:06.63, bitrate: 768 kb/s\n Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 48000 Hz, mono, s16, 768 kb/s\nAt least one output file must be specified\n\n\nAs can be seen from the cell above, the recording rate is 48 kHz. We will need this when we do some processing in Python.\nBuilding a noise profile from first 3 second\n\n!ffmpeg -i Test.wav -ss 0 -to 3.5 -c copy Noise-Test.wav -v quiet -y\n\n\nAudio('Noise-Test.wav')\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox Noise-Test.wav -n rate 32k spectrogram -o ../images/sox-noise.png \n\n\n\n!sox Noise-Test.wav -n noiseprof noise.prof\n\n\n!sox Noise-Test.wav Noise-Test-cleaned.wav noisered noise.prof 0.21\n\n\nAudio(\"Noise-Test-cleaned.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox Test.wav Test-cleaned-05.wav noisered noise.prof 0.05\n\n\n!sox Test.wav Test-cleaned-18.wav noisered noise.prof 0.18\n!sox Test.wav Test-cleaned-21.wav noisered noise.prof 0.21\n\n\nAudio(\"Test-cleaned-05.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\nAudio(\"Test-cleaned-18.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\nAudio(\"Test-cleaned-21.wav\")\n\n\n!sox Test-cleaned-21.wav -n rate 32k spectrogram -o ../images/sox-cleaned-21.png \n\n\n\n!sox Test-cleaned-05.wav -n rate 32k spectrogram -o ../images/sox-cleaned-05.png \n\n\n\nAudio(\"Test-audacity.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox Test-audacity.wav -n rate 32k spectrogram -o ../images/sg-audacity.png \n\n\n\n!ffmpeg -i Test.wav -filter:a \"highpass=f=300\" high-passed.wav -y -v quiet\n\n\n\nAudio(\"high-passed.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox high-passed.wav -n rate 32k spectrogram -o ../images/highpass.png \n\n\n\nAudio(\"test-imovie.wav\")\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n!sox test-imovie.wav -n remix 1 rate 32k spectrogram -o ../images/imovie.png \n\n\nimport mediapy\n\norig = mediapy.read_image('../images/sox-sg-trimmed.png')\naudacity = mediapy.read_image('../images/sg-audacity.png')\nsox_21 = mediapy.read_image('../images/sox-cleaned-21.png')\nsox_05 = mediapy.read_image('../images/sox-cleaned-05.png')\nhigh_pass_300 = mediapy.read_image('../images/highpass.png')\nimovie = mediapy.read_image('../images/imovie.png')\n\n\n\n\nmediapy.show_images({'Original':orig, \n 'Audacity':audacity,\n 'Sox:0.21':sox_21,\n 'Sox:0.05':sox_05,\n 'HPF:300': high_pass_300,\n 'imovie':imovie},\n cmap='magma', columns=4, height=200 )\n\n\n\n\n\n\n\n\n\n\n\n\n\nOriginal\n\n\n\n\n\n\n\n\n\nAudacity\n\n\n\n\n\n\n\n\n\nSox:0.21\n\n\n\n\n\n\n\n\n\nSox:0.05\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHPF:300\n\n\n\n\n\n\n\n\n\nimovie\n\n\n\n\n\n\n\n\n\n\n\n\n!sox test-audacity.wav output.dat\n\n\nimport pandas as pd\ndf = pd.read_csv(\"output.dat\", skiprows=2, index_col=0, names=['values'],delim_whitespace=True)\ndf = df.astype('float64')\n\n\ndf.plot()" }, { "objectID": "posts/2020-01-14-test-markdown-post.html", @@ -823,7 +830,7 @@ "href": "posts/2013-04-01-download_weather.html", "title": "Downloading weather data", "section": "", - "text": "In this notebook, I’ll write a small illustration on downloading historical weather data using forceast.io. I’ll also illustrate handling timezone issues when using such time series data. I am going to use python-forecastio, which is a Python wrapper around forecast.io service. I’ll be downloading hourly weather data for Austin, Texas.\n\nimport datetime\nimport pandas as pd\nimport forecastio\nimport getpass\n\n\n# Enter your API here\napi_key = getpass.getpass()\n\n········\n\n\n\nlen(api_key)\n\n32\n\n\nAustin’s Latitude and longitude\n\nlat = 30.25\nlng = -97.25\n\nLet us see the forecast for 1 Jan 2015\n\ndate = datetime.datetime(2015,1,1)\n\n\nforecast = forecastio.load_forecast(api_key, lat, lng, time=date, units=\"us\")\n\n\nforecast\n\n<forecastio.models.Forecast at 0x10319ce50>\n\n\n\nhourly = forecast.hourly()\n\n\nhourly.data\n\n[<forecastio.models.ForecastioDataPoint at 0x1068643d0>,\n <forecastio.models.ForecastioDataPoint at 0x106864bd0>,\n <forecastio.models.ForecastioDataPoint at 0x106864ad0>,\n <forecastio.models.ForecastioDataPoint at 0x106864cd0>,\n <forecastio.models.ForecastioDataPoint at 0x106864fd0>,\n <forecastio.models.ForecastioDataPoint at 0x106864d10>,\n <forecastio.models.ForecastioDataPoint at 0x100734e10>,\n <forecastio.models.ForecastioDataPoint at 0x1061e3450>,\n <forecastio.models.ForecastioDataPoint at 0x1061e3350>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3250>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3110>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3150>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3190>,\n <forecastio.models.ForecastioDataPoint at 0x1068b31d0>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3210>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3fd0>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3dd0>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3e10>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3e50>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3f50>,\n <forecastio.models.ForecastioDataPoint at 0x1068c84d0>,\n <forecastio.models.ForecastioDataPoint at 0x1068c8390>,\n <forecastio.models.ForecastioDataPoint at 0x1068c8510>,\n <forecastio.models.ForecastioDataPoint at 0x1068c8550>]\n\n\nExtracting data for a single hour.\n\nhourly.data[0].d\n\n{u'apparentTemperature': 32.57,\n u'dewPoint': 33.39,\n u'humidity': 0.79,\n u'icon': u'clear-night',\n u'precipIntensity': 0,\n u'precipProbability': 0,\n u'pressure': 1032.61,\n u'summary': u'Clear',\n u'temperature': 39.46,\n u'time': 1420005600,\n u'visibility': 10,\n u'windBearing': 21,\n u'windSpeed': 10.95}\n\n\nLet us say that we want to use the temperature and humidity only.\n\nattributes = [\"temperature\", \"humidity\"]\n\n\ntimes = []\ndata = {}\nfor attr in attributes:\n data[attr] = []\n\nNow, let us download hourly data for 30 days staring January 1 this year.\n\nstart = datetime.datetime(2015, 1, 1)\nfor offset in range(1, 60):\n forecast = forecastio.load_forecast(api_key, lat, lng, time=start+datetime.timedelta(offset), units=\"us\")\n h = forecast.hourly()\n d = h.data\n for p in d:\n times.append(p.time)\n for attr in attributes:\n data[attr].append(p.d[attr])\n\nNow, let us create a Pandas data frame for this time series data.\n\ndf = pd.DataFrame(data, index=times)\n\n\ndf.head()\n\n\n\n\n\n\n\n\nhumidity\ntemperature\n\n\n\n\n2015-01-01 11:30:00\n0.73\n38.74\n\n\n2015-01-01 12:30:00\n0.74\n38.56\n\n\n2015-01-01 13:30:00\n0.75\n38.56\n\n\n2015-01-01 14:30:00\n0.79\n37.97\n\n\n2015-01-01 15:30:00\n0.80\n37.78\n\n\n\n\n\n\n\n\nNow, we need to fix the timezone.\n\ndf = df.tz_localize(\"Asia/Kolkata\").tz_convert(\"US/Central\")\n\n\ndf.head()\n\n\n\n\n\n\n\n\nhumidity\ntemperature\n\n\n\n\n2015-01-01 00:00:00-06:00\n0.73\n38.74\n\n\n2015-01-01 01:00:00-06:00\n0.74\n38.56\n\n\n2015-01-01 02:00:00-06:00\n0.75\n38.56\n\n\n2015-01-01 03:00:00-06:00\n0.79\n37.97\n\n\n2015-01-01 04:00:00-06:00\n0.80\n37.78\n\n\n\n\n\n\n\n\nI’ll now export this file to a CSV to use it for following demonstrations on aggregations on time series.\n\ndf.to_csv(\"weather.csv\")\n\nA quick validation of our downloaded data.\n\n%matplotlib inline\nimport matplotlib.pyplot as plt\nplt.style.use('ggplot')\n\n\ndf.plot(subplots=True);" + "text": "In this notebook, I’ll write a small illustration on downloading historical weather data using forceast.io. I’ll also illustrate handling timezone issues when using such time series data. I am going to use python-forecastio, which is a Python wrapper around forecast.io service. I’ll be downloading hourly weather data for Austin, Texas.\n\nimport datetime\nimport pandas as pd\nimport forecastio\nimport getpass\n\n\n# Enter your API here\napi_key = getpass.getpass()\n\n········\n\n\n\nlen(api_key)\n\n32\n\n\nAustin’s Latitude and longitude\n\nlat = 30.25\nlng = -97.25\n\nLet us see the forecast for 1 Jan 2015\n\ndate = datetime.datetime(2015,1,1)\n\n\nforecast = forecastio.load_forecast(api_key, lat, lng, time=date, units=\"us\")\n\n\nforecast\n\n<forecastio.models.Forecast at 0x10319ce50>\n\n\n\nhourly = forecast.hourly()\n\n\nhourly.data\n\n[<forecastio.models.ForecastioDataPoint at 0x1068643d0>,\n <forecastio.models.ForecastioDataPoint at 0x106864bd0>,\n <forecastio.models.ForecastioDataPoint at 0x106864ad0>,\n <forecastio.models.ForecastioDataPoint at 0x106864cd0>,\n <forecastio.models.ForecastioDataPoint at 0x106864fd0>,\n <forecastio.models.ForecastioDataPoint at 0x106864d10>,\n <forecastio.models.ForecastioDataPoint at 0x100734e10>,\n <forecastio.models.ForecastioDataPoint at 0x1061e3450>,\n <forecastio.models.ForecastioDataPoint at 0x1061e3350>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3250>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3110>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3150>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3190>,\n <forecastio.models.ForecastioDataPoint at 0x1068b31d0>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3210>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3fd0>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3dd0>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3e10>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3e50>,\n <forecastio.models.ForecastioDataPoint at 0x1068b3f50>,\n <forecastio.models.ForecastioDataPoint at 0x1068c84d0>,\n <forecastio.models.ForecastioDataPoint at 0x1068c8390>,\n <forecastio.models.ForecastioDataPoint at 0x1068c8510>,\n <forecastio.models.ForecastioDataPoint at 0x1068c8550>]\n\n\nExtracting data for a single hour.\n\nhourly.data[0].d\n\n{u'apparentTemperature': 32.57,\n u'dewPoint': 33.39,\n u'humidity': 0.79,\n u'icon': u'clear-night',\n u'precipIntensity': 0,\n u'precipProbability': 0,\n u'pressure': 1032.61,\n u'summary': u'Clear',\n u'temperature': 39.46,\n u'time': 1420005600,\n u'visibility': 10,\n u'windBearing': 21,\n u'windSpeed': 10.95}\n\n\nLet us say that we want to use the temperature and humidity only.\n\nattributes = [\"temperature\", \"humidity\"]\n\n\ntimes = []\ndata = {}\nfor attr in attributes:\n data[attr] = []\n\nNow, let us download hourly data for 30 days staring January 1 this year.\n\nstart = datetime.datetime(2015, 1, 1)\nfor offset in range(1, 60):\n forecast = forecastio.load_forecast(api_key, lat, lng, time=start+datetime.timedelta(offset), units=\"us\")\n h = forecast.hourly()\n d = h.data\n for p in d:\n times.append(p.time)\n for attr in attributes:\n data[attr].append(p.d[attr])\n\nNow, let us create a Pandas data frame for this time series data.\n\ndf = pd.DataFrame(data, index=times)\n\n\ndf.head()\n\n\n\n\n\n\n\nhumidity\ntemperature\n\n\n\n\n2015-01-01 11:30:00\n0.73\n38.74\n\n\n2015-01-01 12:30:00\n0.74\n38.56\n\n\n2015-01-01 13:30:00\n0.75\n38.56\n\n\n2015-01-01 14:30:00\n0.79\n37.97\n\n\n2015-01-01 15:30:00\n0.80\n37.78\n\n\n\n\n\n\n\nNow, we need to fix the timezone.\n\ndf = df.tz_localize(\"Asia/Kolkata\").tz_convert(\"US/Central\")\n\n\ndf.head()\n\n\n\n\n\n\n\nhumidity\ntemperature\n\n\n\n\n2015-01-01 00:00:00-06:00\n0.73\n38.74\n\n\n2015-01-01 01:00:00-06:00\n0.74\n38.56\n\n\n2015-01-01 02:00:00-06:00\n0.75\n38.56\n\n\n2015-01-01 03:00:00-06:00\n0.79\n37.97\n\n\n2015-01-01 04:00:00-06:00\n0.80\n37.78\n\n\n\n\n\n\n\nI’ll now export this file to a CSV to use it for following demonstrations on aggregations on time series.\n\ndf.to_csv(\"weather.csv\")\n\nA quick validation of our downloaded data.\n\n%matplotlib inline\nimport matplotlib.pyplot as plt\nplt.style.use('ggplot')\n\n\ndf.plot(subplots=True);" }, { "objectID": "posts/2018-08-18-placement-preparation-2018-1-hashmap.html", @@ -935,7 +942,7 @@ "href": "posts/2022-02-07-coin-toss.html", "title": "Coin Toss (MLE, MAP, Fully Bayesian) in TF Probability", "section": "", - "text": "Goals\nWe will be studying the problem of coin tosses. I will not go into derivations but mostly deal with automatic gradient computation in TF Probability.\nWe have the following goals in this tutorial.\n\nGoal 1: Maximum Likelihood Estimate (MLE)\nGiven a set of N observations, estimate the probability of H (denoted as \\(\\theta = p(H)\\))\n\n\nGoal 2: Maximum A-Posteriori (MAP)\nGiven a set of N observations and some prior knowledge on the distribution of \\(\\theta\\), estimate the best point estimate of \\(\\theta\\) once we have observed the dataset.\n\n\nGoal 3: Fully Bayesian\nGiven a set of N observations and some prior knowledge on the distribution of \\(\\theta\\), estimate the distribution of \\(\\theta\\) once we have observed the dataset.\nWhile I mention all the references below, I acknowledge Felix and his excellent repo and video playlist (Playlist 1, Playlist 2). They inspired me to create this post.\n\n\n\nBasic Imports\n\nfrom silence_tensorflow import silence_tensorflow\n\nsilence_tensorflow()\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport tensorflow as tf\nimport functools\nimport seaborn as sns\nimport tensorflow_probability as tfp\nimport pandas as pd\n\ntfd = tfp.distributions\ntfl = tfp.layers\ntfb = tfp.bijectors\n\nsns.reset_defaults()\nsns.set_context(context=\"talk\", font_scale=1)\n%matplotlib inline\n%config InlineBackend.figure_format='retina'\n\n\nCreating a dataset\nLet us create a dataset. We will assume the coin toss to be given as per the Bernoulli distribution. We will assume that \\(\\theta = p(H) = 0.75\\) and generate 10 samples. We will fix the random seeds for reproducibility.\nWe will be encoding Heads as 1 and Tails as 0.\n\nnp.random.seed(0)\ntf.random.set_seed(0)\n\n\ndistribution = tfd.Bernoulli(probs=0.75)\n\ndataset_10 = distribution.sample(10)\nprint(dataset_10.numpy())\nmle_estimate_10 = tf.reduce_mean(tf.cast(dataset_10, tf.float32))\ntf.print(mle_estimate_10)\n\n[0 0 0 1 1 1 1 1 0 1]\n0.6\n\n\n\n\n\nMLE\n\nObtaining MLE analytically\nFrom the above 10 samples, we obtain 6 Heads (1) and 4 Tails. As per the principal of MLE, the best estimate for \\(\\theta = p(H) = \\dfrac{n_h}{n_h+n_t} = 0.6\\)\nWe may also notice that the value of 0.6 is far from the 0.75 value we had initially set. This is possible as our dataset is small.\nWe will now verify if we get the same result using TFP. But, first, we can create a graphical model for our problem.\n\n\nGraphical model\n\nimport daft\n\npgm = daft.PGM([4, 3], origin=[0, 0])\npgm.add_node(daft.Node(\"theta\", r\"$\\theta$\", 1, 2.5, aspect=1.8))\n\npgm.add_node(daft.Node(\"obs\", r\"$obs_i$\", 1, 1, aspect=1.2, observed=True))\n\npgm.add_edge(\"theta\", \"obs\")\npgm.add_plate([0, 0.5, 2, 1.0], label=r\"$N$\", shift=-0.1)\npgm.render()\n\n\n\n\n\n\n\n\n\n\nObtaining MLE analytically for different dataset sizes\n\ndataset_large = distribution.sample(100000)\n\nmle_estimate = {}\nfor dataset_size in [10, 50, 100, 500, 1000, 10000, 100000]:\n mle_estimate[dataset_size] = tf.reduce_mean(\n tf.cast(dataset_large[:dataset_size], tf.float32)\n )\ntf.print(mle_estimate)\n\n{10: 0.9,\n 50: 0.76,\n 100: 0.71,\n 500: 0.746,\n 1000: 0.749,\n 10000: 0.749,\n 100000: 0.75144}\n\n\nAs we can see above, when we use larger dataset sizes, our estimate matches the value we set (0.75).\n\n\nUsing TFP for MLE\n\nModel setup\n\ntheta = tf.Variable(0.1)\nfit = tfd.Bernoulli(probs=theta)\n\nfit.log_prob(dataset_10)\n\n<tf.Tensor: shape=(10,), dtype=float32, numpy=\narray([-0.10536052, -0.10536052, -0.10536052, -2.3025851 , -2.3025851 ,\n -2.3025851 , -2.3025851 , -2.3025851 , -0.10536052, -2.3025851 ],\n dtype=float32)>\n\n\n\n\nDefining loss\nWe now define the negative log likelihood as our loss function and work towards minimizing it.\n\ndataset = dataset_10\n\n\ndef loss():\n return -tf.reduce_sum(fit.log_prob(dataset))\n\n\n\nTracing variables over training\n\ntrace_fn = lambda traceable_quantities: {\n \"loss\": traceable_quantities.loss,\n \"theta\": theta,\n}\n\nnum_steps = 150\n\n\n\nMinimizing the loss function\n\ntrace = tfp.math.minimize(\n loss_fn=loss,\n num_steps=num_steps,\n optimizer=tf.optimizers.Adam(learning_rate=0.01),\n trace_fn=trace_fn,\n)\n\n\ntheta\n\n<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.5981374>\n\n\n\nfig, ax = plt.subplots(nrows=2, sharex=True, figsize=(6, 4))\nax[0].plot(range(num_steps), trace[\"loss\"])\nax[1].plot(range(num_steps), trace[\"theta\"])\nsns.despine()\nax[1].set_xlabel(\"Iterations\")\nax[0].set_ylabel(\"Loss\")\nax[1].set_ylabel(r\"$\\theta$\")\nfig.tight_layout()\n\n\n\n\n\n\n\n\nFrom the above calculations, we can see that we have obtained the same estimate of ~0.6 using TFP.\n\n\nAlternate way to minimize\nPreviously, we used the tf.math.minimize, but we can also use tf.GradientTape() for the same purpose.\n\n@tf.function\ndef loss_and_grads(fit):\n with tf.GradientTape() as tape:\n loss = -tf.reduce_sum(fit.log_prob(dataset))\n return loss, tape.gradient(loss, fit.trainable_variables)\n\n\noptimizer = tf.keras.optimizers.Adam(learning_rate=0.01)\n\n\ntheta = tf.Variable(0.1)\nfit = tfd.Bernoulli(probs=theta)\n\n\nfor i in range(num_steps):\n loss, grads = loss_and_grads(fit)\n optimizer.apply_gradients(zip(grads, fit.trainable_variables))\n\n\nfit.trainable_variables\n\n(<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.5981374>,)\n\n\nWe can see that we obtain the same estimate.\n\n\n\n\nMAP\nWe will now be setting a prior over \\(\\theta\\). A general graphical model is shown below.\n\npgm = daft.PGM([4, 4], origin=[0, 0])\npgm.add_node(daft.Node(\"alpha\", r\"$\\alpha$\", 0.5, 3.5, aspect=1.8))\npgm.add_node(daft.Node(\"beta\", r\"$\\beta$\", 1.5, 3.5, aspect=1.8))\n\n\npgm.add_node(daft.Node(\"theta\", r\"$\\theta$\", 1, 2.5, aspect=2))\n# pgm.add_node(daft.Node(\"theta\", r\"$\\theta\\sim Beta (\\alpha, \\beta)$\", 1, 2.5, aspect=4))\n\npgm.add_node(daft.Node(\"obs\", r\"$obs_i$\", 1, 1, aspect=1.2, observed=True))\n\npgm.add_edge(\"theta\", \"obs\")\npgm.add_edge(\"alpha\", \"theta\")\npgm.add_edge(\"beta\", \"theta\")\n\n\npgm.add_plate([0, 0.5, 2, 1.0], label=r\"$N$\", shift=-0.1)\npgm.render()\n\n\n\n\n\n\n\n\n\nMAP with uniform prior\nFirst, we see the estimate for \\(\\theta\\) if we use the uniform prior. We should obtain the MLE answer.\n\ndef coin_toss_uniform_model():\n theta = yield tfp.distributions.Uniform(low=0.0, high=1.0, name=\"Theta\")\n coin = yield tfp.distributions.Bernoulli(probs=tf.ones(100) * theta, name=\"Coin\")\n\n\ncoin_toss_uniform_model\n\n<function __main__.coin_toss_uniform_model()>\n\n\n\nmodel_joint_uniform = tfp.distributions.JointDistributionCoroutineAutoBatched(\n lambda: coin_toss_uniform_model(), name=\"Original\"\n)\n\n\nmodel_joint_uniform\n\n<tfp.distributions.JointDistributionCoroutineAutoBatched 'Original' batch_shape=[] event_shape=StructTuple(\n Theta=[],\n Coin=[100]\n) dtype=StructTuple(\n Theta=float32,\n Coin=int32\n)>\n\n\n\ndef uniform_model(dataset):\n num_datapoints = len(dataset)\n theta = yield tfp.distributions.Uniform(low=0.0, high=1.0, name=\"Theta\")\n\n coin = yield tfp.distributions.Bernoulli(\n probs=tf.ones(num_datapoints) * theta, name=\"Coin\"\n )\n\n\nconcrete_uniform_model = functools.partial(uniform_model, dataset=dataset_10)\n\nmodel = tfd.JointDistributionCoroutineAutoBatched(concrete_uniform_model)\n\n\nmodel.sample()\n\nStructTuple(\n Theta=<tf.Tensor: shape=(), dtype=float32, numpy=0.5930122>,\n Coin=<tf.Tensor: shape=(10,), dtype=int32, numpy=array([1, 0, 1, 0, 1, 1, 1, 0, 1, 1], dtype=int32)>\n)\n\n\n\nth = tf.Variable(0.4)\n\ntarget_log_prob_fn = lambda th: model.log_prob((th, dataset_10))\n\n\nx_s = tf.linspace(0.0, 1.0, 1000)\ny_s = -target_log_prob_fn(x_s)\nplt.plot(x_s, y_s)\nplt.xlabel(r\"$\\theta$\")\nplt.ylabel(\"- Joint Log Prob \\n(Unnormalized)\")\n\nsns.despine()\n\n\n\n\n\n\n\n\n\ntrace = tfp.math.minimize(\n lambda: -target_log_prob_fn(th),\n optimizer=tf.optimizers.Adam(learning_rate=0.01),\n # trace_fn=trace_fn,\n num_steps=200,\n)\n\n\nth\n\n<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.59999406>\n\n\n\nmle_estimate_10\n\n<tf.Tensor: shape=(), dtype=float32, numpy=0.6>\n\n\nWe see above that our MAP estimate is fairly close to the MLE when we used the uniform prior.\n\n\nMAP with Beta prior\nWe will now use a much more informative prior – the Beta prior. We will be setting \\(\\alpha=40\\) and \\(\\beta=10\\) indicating that we have a prior belief that Tails is much more likely than Heads. This is a bad assumption and in the limited data regime will lead to poor estimates.\n\ndef beta_prior_model(dataset, alpha, beta):\n num_datapoints = len(dataset)\n theta = yield tfp.distributions.Beta(\n concentration0=alpha, concentration1=beta, name=\"Theta\"\n )\n\n coin = yield tfp.distributions.Bernoulli(\n probs=tf.ones(num_datapoints) * theta, name=\"Coin\"\n )\n\n\nconcrete_beta_prior_model_40_10 = functools.partial(\n beta_prior_model, dataset=dataset_10, alpha=40, beta=10\n)\n\n\nmodel_2_40_10 = tfd.JointDistributionCoroutineAutoBatched(\n concrete_beta_prior_model_40_10\n)\n\n\nmodel_2_40_10.sample()\n\nStructTuple(\n Theta=<tf.Tensor: shape=(), dtype=float32, numpy=0.16982338>,\n Coin=<tf.Tensor: shape=(10,), dtype=int32, numpy=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)>\n)\n\n\n\nmodel_2_40_10.prob(Theta=0.1, Coin=[0, 0, 0, 0, 0, 0, 0, 0, 1, 1])\n\n<tf.Tensor: shape=(), dtype=float32, numpy=0.005809709>\n\n\n\nth = tf.Variable(0.2)\n\ntarget_log_prob_fn = lambda th: model_2_40_10.log_prob(Theta=th, Coin=dataset_10)\n\n\nx_s = tf.linspace(0.0, 1.0, 1000)\ny_s = -target_log_prob_fn(x_s)\nplt.plot(x_s, y_s)\nplt.xlabel(r\"$\\theta$\")\nplt.ylabel(\"- Joint Log Prob \\n(Unnormalized)\")\n\nsns.despine()\n\n\n\n\n\n\n\n\n\ntrace = tfp.math.minimize(\n lambda: -target_log_prob_fn(th),\n optimizer=tf.optimizers.Adam(learning_rate=0.01),\n # trace_fn=trace_fn,\n num_steps=200,\n)\n\n\nth\n\n<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.25861916>\n\n\nWe now see that our MAP estimate for \\(\\theta\\) is 0.25, which is very far from the MLE. Choosing a better prior would have led to better estimates. Or, if we had more data, the likelihood would have dominated over the prior resulting in better estimates.\n\nconcrete_beta_prior_model_1_1 = functools.partial(\n beta_prior_model, dataset=dataset_10, alpha=1, beta=1\n)\n\nmodel_2_1_1 = tfd.JointDistributionCoroutineAutoBatched(concrete_beta_prior_model_1_1)\n\nth = tf.Variable(0.2)\n\ntarget_log_prob_fn = lambda th: model_2_1_1.log_prob(Theta=th, Coin=dataset_10)\n\ntrace = tfp.math.minimize(\n lambda: -target_log_prob_fn(th),\n optimizer=tf.optimizers.Adam(learning_rate=0.01),\n # trace_fn=trace_fn,\n num_steps=200,\n)\n\nth\n\n<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.6000196>\n\n\nOur estimate for \\(\\theta\\) is more reasonable now.\n\n\n\nFully Bayesian\nWe now need to define a model \\(q(\\theta)\\) to act as the surrogate for our posterior \\(p(\\theta|D)\\). Let us use a Beta distribution.\n\nq_alpha = tf.Variable(1.0)\nq_beta = tf.Variable(1.0)\n\n\nsurrogate_posterior = tfd.Beta(concentration0=q_alpha, concentration1=q_beta, name=\"q\")\n\n\nsurrogate_posterior.sample()\n\n<tf.Tensor: shape=(), dtype=float32, numpy=0.7745516>\n\n\n\nlosses = tfp.vi.fit_surrogate_posterior(\n target_log_prob_fn,\n surrogate_posterior=surrogate_posterior,\n optimizer=tf.optimizers.Adam(learning_rate=0.005),\n num_steps=400,\n)\n\n\nplt.plot(losses)\nplt.xlabel(\"Iterations\")\nplt.ylabel(\"Loss\")\nsns.despine()\n\n\n\n\n\n\n\n\n\nq_alpha, q_beta\n\n(<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.1893775>,\n <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.5093094>)\n\n\n\nsns.kdeplot(surrogate_posterior.sample(500).numpy(), bw_adjust=2)\nsns.despine()\nplt.xlabel(r\"$\\theta$\")\n\nText(0.5, 0, '$\\\\theta$')\n\n\n\n\n\n\n\n\n\n\nGenerating samples on coin tosses conditioning on theta\nFirst, let us look at the syntax and then generate 1000 samples.\n\nmodel_2_1_1.sample(Theta=0.1)\n\nStructTuple(\n Theta=<tf.Tensor: shape=(), dtype=float32, numpy=0.1>,\n Coin=<tf.Tensor: shape=(10,), dtype=int32, numpy=array([1, 0, 0, 0, 0, 0, 0, 1, 0, 0], dtype=int32)>\n)\n\n\n\nmodel_2_1_1.sample(Theta=0.9)\n\nStructTuple(\n Theta=<tf.Tensor: shape=(), dtype=float32, numpy=0.9>,\n Coin=<tf.Tensor: shape=(10,), dtype=int32, numpy=array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)>\n)\n\n\nWe can clearly see that conditioning on r\\(\\theta\\) changes the number of heads.\n\n\nFun check: What if we fix the dataset and sample on theta?\n\nmodel_2_1_1.sample(Coin=[0, 1, 1, 0, 1, 1, 1, 0])\n\nStructTuple(\n Theta=<tf.Tensor: shape=(), dtype=float32, numpy=0.34792978>,\n Coin=<tf.Tensor: shape=(8,), dtype=int32, numpy=array([0, 1, 1, 0, 1, 1, 1, 0], dtype=int32)>\n)\n\n\n\nmodel_2_1_1.sample(Coin=[0, 1, 1, 0, 1, 1, 1, 0])\n\nStructTuple(\n Theta=<tf.Tensor: shape=(), dtype=float32, numpy=0.74594486>,\n Coin=<tf.Tensor: shape=(8,), dtype=int32, numpy=array([0, 1, 1, 0, 1, 1, 1, 0], dtype=int32)>\n)\n\n\nAs we see above, we can get different \\(\\theta\\). If our dataset was large, this effect would be less pronounced.\n\nc = model_2_1_1.sample(Theta=surrogate_posterior.sample(1000)).Coin\n\n\npd.DataFrame(c)\n\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n\n\n\n\n0\n0\n0\n1\n1\n0\n0\n0\n0\n0\n0\n\n\n1\n0\n1\n1\n1\n1\n1\n1\n1\n1\n1\n\n\n2\n1\n1\n0\n0\n0\n0\n1\n0\n0\n0\n\n\n3\n0\n0\n1\n0\n1\n0\n0\n0\n0\n0\n\n\n4\n1\n1\n1\n0\n0\n1\n1\n1\n1\n1\n\n\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n\n\n995\n0\n0\n0\n0\n1\n1\n0\n0\n1\n0\n\n\n996\n1\n0\n1\n1\n1\n1\n1\n1\n0\n0\n\n\n997\n0\n0\n0\n0\n1\n1\n1\n1\n1\n0\n\n\n998\n1\n1\n1\n1\n1\n0\n1\n1\n1\n0\n\n\n999\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n\n\n\n\n1000 rows × 10 columns\n\n\n\n\n\nsns.histplot(tf.reduce_sum(tf.cast(c, tf.float32), axis=1), bins=11)\nsns.despine()\n\n\n\n\n\n\n\n\nWe can see the count of number of heads in 1000 samples generated from the posterior.\n\n\nReferences (incomplete as of now)\n\nExcellent repo and video playlist (Playlist 1, Playlist 2) by Felix\nProbabilistic PCA tutorial on TFP\nDiscussion on joint log prob on TFP" + "text": "Goals\nWe will be studying the problem of coin tosses. I will not go into derivations but mostly deal with automatic gradient computation in TF Probability.\nWe have the following goals in this tutorial.\n\nGoal 1: Maximum Likelihood Estimate (MLE)\nGiven a set of N observations, estimate the probability of H (denoted as \\(\\theta = p(H)\\))\n\n\nGoal 2: Maximum A-Posteriori (MAP)\nGiven a set of N observations and some prior knowledge on the distribution of \\(\\theta\\), estimate the best point estimate of \\(\\theta\\) once we have observed the dataset.\n\n\nGoal 3: Fully Bayesian\nGiven a set of N observations and some prior knowledge on the distribution of \\(\\theta\\), estimate the distribution of \\(\\theta\\) once we have observed the dataset.\nWhile I mention all the references below, I acknowledge Felix and his excellent repo and video playlist (Playlist 1, Playlist 2). They inspired me to create this post.\n\n\n\nBasic Imports\n\nfrom silence_tensorflow import silence_tensorflow\n\nsilence_tensorflow()\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport tensorflow as tf\nimport functools\nimport seaborn as sns\nimport tensorflow_probability as tfp\nimport pandas as pd\n\ntfd = tfp.distributions\ntfl = tfp.layers\ntfb = tfp.bijectors\n\nsns.reset_defaults()\nsns.set_context(context=\"talk\", font_scale=1)\n%matplotlib inline\n%config InlineBackend.figure_format='retina'\n\n\nCreating a dataset\nLet us create a dataset. We will assume the coin toss to be given as per the Bernoulli distribution. We will assume that \\(\\theta = p(H) = 0.75\\) and generate 10 samples. We will fix the random seeds for reproducibility.\nWe will be encoding Heads as 1 and Tails as 0.\n\nnp.random.seed(0)\ntf.random.set_seed(0)\n\n\ndistribution = tfd.Bernoulli(probs=0.75)\n\ndataset_10 = distribution.sample(10)\nprint(dataset_10.numpy())\nmle_estimate_10 = tf.reduce_mean(tf.cast(dataset_10, tf.float32))\ntf.print(mle_estimate_10)\n\n[0 0 0 1 1 1 1 1 0 1]\n0.6\n\n\n\n\n\nMLE\n\nObtaining MLE analytically\nFrom the above 10 samples, we obtain 6 Heads (1) and 4 Tails. As per the principal of MLE, the best estimate for \\(\\theta = p(H) = \\dfrac{n_h}{n_h+n_t} = 0.6\\)\nWe may also notice that the value of 0.6 is far from the 0.75 value we had initially set. This is possible as our dataset is small.\nWe will now verify if we get the same result using TFP. But, first, we can create a graphical model for our problem.\n\n\nGraphical model\n\nimport daft\n\npgm = daft.PGM([4, 3], origin=[0, 0])\npgm.add_node(daft.Node(\"theta\", r\"$\\theta$\", 1, 2.5, aspect=1.8))\n\npgm.add_node(daft.Node(\"obs\", r\"$obs_i$\", 1, 1, aspect=1.2, observed=True))\n\npgm.add_edge(\"theta\", \"obs\")\npgm.add_plate([0, 0.5, 2, 1.0], label=r\"$N$\", shift=-0.1)\npgm.render()\n\n\n\n\n\n\n\n\n\n\nObtaining MLE analytically for different dataset sizes\n\ndataset_large = distribution.sample(100000)\n\nmle_estimate = {}\nfor dataset_size in [10, 50, 100, 500, 1000, 10000, 100000]:\n mle_estimate[dataset_size] = tf.reduce_mean(\n tf.cast(dataset_large[:dataset_size], tf.float32)\n )\ntf.print(mle_estimate)\n\n{10: 0.9,\n 50: 0.76,\n 100: 0.71,\n 500: 0.746,\n 1000: 0.749,\n 10000: 0.749,\n 100000: 0.75144}\n\n\nAs we can see above, when we use larger dataset sizes, our estimate matches the value we set (0.75).\n\n\nUsing TFP for MLE\n\nModel setup\n\ntheta = tf.Variable(0.1)\nfit = tfd.Bernoulli(probs=theta)\n\nfit.log_prob(dataset_10)\n\n<tf.Tensor: shape=(10,), dtype=float32, numpy=\narray([-0.10536052, -0.10536052, -0.10536052, -2.3025851 , -2.3025851 ,\n -2.3025851 , -2.3025851 , -2.3025851 , -0.10536052, -2.3025851 ],\n dtype=float32)>\n\n\n\n\nDefining loss\nWe now define the negative log likelihood as our loss function and work towards minimizing it.\n\ndataset = dataset_10\n\n\ndef loss():\n return -tf.reduce_sum(fit.log_prob(dataset))\n\n\n\nTracing variables over training\n\ntrace_fn = lambda traceable_quantities: {\n \"loss\": traceable_quantities.loss,\n \"theta\": theta,\n}\n\nnum_steps = 150\n\n\n\nMinimizing the loss function\n\ntrace = tfp.math.minimize(\n loss_fn=loss,\n num_steps=num_steps,\n optimizer=tf.optimizers.Adam(learning_rate=0.01),\n trace_fn=trace_fn,\n)\n\n\ntheta\n\n<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.5981374>\n\n\n\nfig, ax = plt.subplots(nrows=2, sharex=True, figsize=(6, 4))\nax[0].plot(range(num_steps), trace[\"loss\"])\nax[1].plot(range(num_steps), trace[\"theta\"])\nsns.despine()\nax[1].set_xlabel(\"Iterations\")\nax[0].set_ylabel(\"Loss\")\nax[1].set_ylabel(r\"$\\theta$\")\nfig.tight_layout()\n\n\n\n\n\n\n\n\nFrom the above calculations, we can see that we have obtained the same estimate of ~0.6 using TFP.\n\n\nAlternate way to minimize\nPreviously, we used the tf.math.minimize, but we can also use tf.GradientTape() for the same purpose.\n\n@tf.function\ndef loss_and_grads(fit):\n with tf.GradientTape() as tape:\n loss = -tf.reduce_sum(fit.log_prob(dataset))\n return loss, tape.gradient(loss, fit.trainable_variables)\n\n\noptimizer = tf.keras.optimizers.Adam(learning_rate=0.01)\n\n\ntheta = tf.Variable(0.1)\nfit = tfd.Bernoulli(probs=theta)\n\n\nfor i in range(num_steps):\n loss, grads = loss_and_grads(fit)\n optimizer.apply_gradients(zip(grads, fit.trainable_variables))\n\n\nfit.trainable_variables\n\n(<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.5981374>,)\n\n\nWe can see that we obtain the same estimate.\n\n\n\n\nMAP\nWe will now be setting a prior over \\(\\theta\\). A general graphical model is shown below.\n\npgm = daft.PGM([4, 4], origin=[0, 0])\npgm.add_node(daft.Node(\"alpha\", r\"$\\alpha$\", 0.5, 3.5, aspect=1.8))\npgm.add_node(daft.Node(\"beta\", r\"$\\beta$\", 1.5, 3.5, aspect=1.8))\n\n\npgm.add_node(daft.Node(\"theta\", r\"$\\theta$\", 1, 2.5, aspect=2))\n# pgm.add_node(daft.Node(\"theta\", r\"$\\theta\\sim Beta (\\alpha, \\beta)$\", 1, 2.5, aspect=4))\n\npgm.add_node(daft.Node(\"obs\", r\"$obs_i$\", 1, 1, aspect=1.2, observed=True))\n\npgm.add_edge(\"theta\", \"obs\")\npgm.add_edge(\"alpha\", \"theta\")\npgm.add_edge(\"beta\", \"theta\")\n\n\npgm.add_plate([0, 0.5, 2, 1.0], label=r\"$N$\", shift=-0.1)\npgm.render()\n\n\n\n\n\n\n\n\n\nMAP with uniform prior\nFirst, we see the estimate for \\(\\theta\\) if we use the uniform prior. We should obtain the MLE answer.\n\ndef coin_toss_uniform_model():\n theta = yield tfp.distributions.Uniform(low=0.0, high=1.0, name=\"Theta\")\n coin = yield tfp.distributions.Bernoulli(probs=tf.ones(100) * theta, name=\"Coin\")\n\n\ncoin_toss_uniform_model\n\n<function __main__.coin_toss_uniform_model()>\n\n\n\nmodel_joint_uniform = tfp.distributions.JointDistributionCoroutineAutoBatched(\n lambda: coin_toss_uniform_model(), name=\"Original\"\n)\n\n\nmodel_joint_uniform\n\n<tfp.distributions.JointDistributionCoroutineAutoBatched 'Original' batch_shape=[] event_shape=StructTuple(\n Theta=[],\n Coin=[100]\n) dtype=StructTuple(\n Theta=float32,\n Coin=int32\n)>\n\n\n\ndef uniform_model(dataset):\n num_datapoints = len(dataset)\n theta = yield tfp.distributions.Uniform(low=0.0, high=1.0, name=\"Theta\")\n\n coin = yield tfp.distributions.Bernoulli(\n probs=tf.ones(num_datapoints) * theta, name=\"Coin\"\n )\n\n\nconcrete_uniform_model = functools.partial(uniform_model, dataset=dataset_10)\n\nmodel = tfd.JointDistributionCoroutineAutoBatched(concrete_uniform_model)\n\n\nmodel.sample()\n\nStructTuple(\n Theta=<tf.Tensor: shape=(), dtype=float32, numpy=0.5930122>,\n Coin=<tf.Tensor: shape=(10,), dtype=int32, numpy=array([1, 0, 1, 0, 1, 1, 1, 0, 1, 1], dtype=int32)>\n)\n\n\n\nth = tf.Variable(0.4)\n\ntarget_log_prob_fn = lambda th: model.log_prob((th, dataset_10))\n\n\nx_s = tf.linspace(0.0, 1.0, 1000)\ny_s = -target_log_prob_fn(x_s)\nplt.plot(x_s, y_s)\nplt.xlabel(r\"$\\theta$\")\nplt.ylabel(\"- Joint Log Prob \\n(Unnormalized)\")\n\nsns.despine()\n\n\n\n\n\n\n\n\n\ntrace = tfp.math.minimize(\n lambda: -target_log_prob_fn(th),\n optimizer=tf.optimizers.Adam(learning_rate=0.01),\n # trace_fn=trace_fn,\n num_steps=200,\n)\n\n\nth\n\n<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.59999406>\n\n\n\nmle_estimate_10\n\n<tf.Tensor: shape=(), dtype=float32, numpy=0.6>\n\n\nWe see above that our MAP estimate is fairly close to the MLE when we used the uniform prior.\n\n\nMAP with Beta prior\nWe will now use a much more informative prior – the Beta prior. We will be setting \\(\\alpha=40\\) and \\(\\beta=10\\) indicating that we have a prior belief that Tails is much more likely than Heads. This is a bad assumption and in the limited data regime will lead to poor estimates.\n\ndef beta_prior_model(dataset, alpha, beta):\n num_datapoints = len(dataset)\n theta = yield tfp.distributions.Beta(\n concentration0=alpha, concentration1=beta, name=\"Theta\"\n )\n\n coin = yield tfp.distributions.Bernoulli(\n probs=tf.ones(num_datapoints) * theta, name=\"Coin\"\n )\n\n\nconcrete_beta_prior_model_40_10 = functools.partial(\n beta_prior_model, dataset=dataset_10, alpha=40, beta=10\n)\n\n\nmodel_2_40_10 = tfd.JointDistributionCoroutineAutoBatched(\n concrete_beta_prior_model_40_10\n)\n\n\nmodel_2_40_10.sample()\n\nStructTuple(\n Theta=<tf.Tensor: shape=(), dtype=float32, numpy=0.16982338>,\n Coin=<tf.Tensor: shape=(10,), dtype=int32, numpy=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)>\n)\n\n\n\nmodel_2_40_10.prob(Theta=0.1, Coin=[0, 0, 0, 0, 0, 0, 0, 0, 1, 1])\n\n<tf.Tensor: shape=(), dtype=float32, numpy=0.005809709>\n\n\n\nth = tf.Variable(0.2)\n\ntarget_log_prob_fn = lambda th: model_2_40_10.log_prob(Theta=th, Coin=dataset_10)\n\n\nx_s = tf.linspace(0.0, 1.0, 1000)\ny_s = -target_log_prob_fn(x_s)\nplt.plot(x_s, y_s)\nplt.xlabel(r\"$\\theta$\")\nplt.ylabel(\"- Joint Log Prob \\n(Unnormalized)\")\n\nsns.despine()\n\n\n\n\n\n\n\n\n\ntrace = tfp.math.minimize(\n lambda: -target_log_prob_fn(th),\n optimizer=tf.optimizers.Adam(learning_rate=0.01),\n # trace_fn=trace_fn,\n num_steps=200,\n)\n\n\nth\n\n<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.25861916>\n\n\nWe now see that our MAP estimate for \\(\\theta\\) is 0.25, which is very far from the MLE. Choosing a better prior would have led to better estimates. Or, if we had more data, the likelihood would have dominated over the prior resulting in better estimates.\n\nconcrete_beta_prior_model_1_1 = functools.partial(\n beta_prior_model, dataset=dataset_10, alpha=1, beta=1\n)\n\nmodel_2_1_1 = tfd.JointDistributionCoroutineAutoBatched(concrete_beta_prior_model_1_1)\n\nth = tf.Variable(0.2)\n\ntarget_log_prob_fn = lambda th: model_2_1_1.log_prob(Theta=th, Coin=dataset_10)\n\ntrace = tfp.math.minimize(\n lambda: -target_log_prob_fn(th),\n optimizer=tf.optimizers.Adam(learning_rate=0.01),\n # trace_fn=trace_fn,\n num_steps=200,\n)\n\nth\n\n<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.6000196>\n\n\nOur estimate for \\(\\theta\\) is more reasonable now.\n\n\n\nFully Bayesian\nWe now need to define a model \\(q(\\theta)\\) to act as the surrogate for our posterior \\(p(\\theta|D)\\). Let us use a Beta distribution.\n\nq_alpha = tf.Variable(1.0)\nq_beta = tf.Variable(1.0)\n\n\nsurrogate_posterior = tfd.Beta(concentration0=q_alpha, concentration1=q_beta, name=\"q\")\n\n\nsurrogate_posterior.sample()\n\n<tf.Tensor: shape=(), dtype=float32, numpy=0.7745516>\n\n\n\nlosses = tfp.vi.fit_surrogate_posterior(\n target_log_prob_fn,\n surrogate_posterior=surrogate_posterior,\n optimizer=tf.optimizers.Adam(learning_rate=0.005),\n num_steps=400,\n)\n\n\nplt.plot(losses)\nplt.xlabel(\"Iterations\")\nplt.ylabel(\"Loss\")\nsns.despine()\n\n\n\n\n\n\n\n\n\nq_alpha, q_beta\n\n(<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.1893775>,\n <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.5093094>)\n\n\n\nsns.kdeplot(surrogate_posterior.sample(500).numpy(), bw_adjust=2)\nsns.despine()\nplt.xlabel(r\"$\\theta$\")\n\nText(0.5, 0, '$\\\\theta$')\n\n\n\n\n\n\n\n\n\n\nGenerating samples on coin tosses conditioning on theta\nFirst, let us look at the syntax and then generate 1000 samples.\n\nmodel_2_1_1.sample(Theta=0.1)\n\nStructTuple(\n Theta=<tf.Tensor: shape=(), dtype=float32, numpy=0.1>,\n Coin=<tf.Tensor: shape=(10,), dtype=int32, numpy=array([1, 0, 0, 0, 0, 0, 0, 1, 0, 0], dtype=int32)>\n)\n\n\n\nmodel_2_1_1.sample(Theta=0.9)\n\nStructTuple(\n Theta=<tf.Tensor: shape=(), dtype=float32, numpy=0.9>,\n Coin=<tf.Tensor: shape=(10,), dtype=int32, numpy=array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)>\n)\n\n\nWe can clearly see that conditioning on r\\(\\theta\\) changes the number of heads.\n\n\nFun check: What if we fix the dataset and sample on theta?\n\nmodel_2_1_1.sample(Coin=[0, 1, 1, 0, 1, 1, 1, 0])\n\nStructTuple(\n Theta=<tf.Tensor: shape=(), dtype=float32, numpy=0.34792978>,\n Coin=<tf.Tensor: shape=(8,), dtype=int32, numpy=array([0, 1, 1, 0, 1, 1, 1, 0], dtype=int32)>\n)\n\n\n\nmodel_2_1_1.sample(Coin=[0, 1, 1, 0, 1, 1, 1, 0])\n\nStructTuple(\n Theta=<tf.Tensor: shape=(), dtype=float32, numpy=0.74594486>,\n Coin=<tf.Tensor: shape=(8,), dtype=int32, numpy=array([0, 1, 1, 0, 1, 1, 1, 0], dtype=int32)>\n)\n\n\nAs we see above, we can get different \\(\\theta\\). If our dataset was large, this effect would be less pronounced.\n\nc = model_2_1_1.sample(Theta=surrogate_posterior.sample(1000)).Coin\n\n\npd.DataFrame(c)\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n\n\n\n\n0\n0\n0\n1\n1\n0\n0\n0\n0\n0\n0\n\n\n1\n0\n1\n1\n1\n1\n1\n1\n1\n1\n1\n\n\n2\n1\n1\n0\n0\n0\n0\n1\n0\n0\n0\n\n\n3\n0\n0\n1\n0\n1\n0\n0\n0\n0\n0\n\n\n4\n1\n1\n1\n0\n0\n1\n1\n1\n1\n1\n\n\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n...\n\n\n995\n0\n0\n0\n0\n1\n1\n0\n0\n1\n0\n\n\n996\n1\n0\n1\n1\n1\n1\n1\n1\n0\n0\n\n\n997\n0\n0\n0\n0\n1\n1\n1\n1\n1\n0\n\n\n998\n1\n1\n1\n1\n1\n0\n1\n1\n1\n0\n\n\n999\n1\n1\n1\n1\n1\n1\n1\n1\n1\n1\n\n\n\n\n1000 rows × 10 columns\n\n\n\n\nsns.histplot(tf.reduce_sum(tf.cast(c, tf.float32), axis=1), bins=11)\nsns.despine()\n\n\n\n\n\n\n\n\nWe can see the count of number of heads in 1000 samples generated from the posterior.\n\n\nReferences (incomplete as of now)\n\nExcellent repo and video playlist (Playlist 1, Playlist 2) by Felix\nProbabilistic PCA tutorial on TFP\nDiscussion on joint log prob on TFP" }, { "objectID": "posts/2014-06-01-em.html", diff --git a/setup/2024/06/12/shortuts-mac/index.html b/setup/2024/06/12/shortuts-mac/index.html new file mode 100644 index 0000000..fd0de99 --- /dev/null +++ b/setup/2024/06/12/shortuts-mac/index.html @@ -0,0 +1,14 @@ + + + Redirect + + + + + diff --git a/sitemap.xml b/sitemap.xml index e1477b9..5436dfd 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,430 +2,434 @@ https://nipunbatra.github.io/blog/posts/2017-08-12-linear-regression-adagrad-vs-gd.html - 2024-05-30T12:37:02.249Z + 2024-06-10T11:37:17.072Z https://nipunbatra.github.io/blog/posts/2017-06-14-widgets-matplotlib.html - 2024-05-30T12:37:02.245Z + 2024-06-10T11:37:17.068Z https://nipunbatra.github.io/blog/posts/2014-05-01-gibbs-sampling.html - 2024-05-30T12:37:02.245Z + 2024-06-10T11:37:17.064Z https://nipunbatra.github.io/blog/posts/2022-02-04-sampling-normal.html - 2024-05-30T12:37:02.357Z + 2024-06-10T11:37:17.180Z https://nipunbatra.github.io/blog/posts/2017-12-29-neural-collaborative-filtering.html - 2024-05-30T12:37:02.253Z + 2024-06-10T11:37:17.072Z https://nipunbatra.github.io/blog/posts/2013-05-01-aggregation-timeseries.html - 2024-05-30T12:37:02.241Z + 2024-06-10T11:37:17.060Z https://nipunbatra.github.io/blog/posts/2017-12-18-recommend-keras.html - 2024-05-30T12:37:02.253Z + 2024-06-10T11:37:17.072Z https://nipunbatra.github.io/blog/posts/comparing-gp.html - 2024-05-30T12:37:02.653Z + 2024-06-10T11:37:17.472Z https://nipunbatra.github.io/blog/posts/networkx-trees.html - 2024-05-30T12:37:03.309Z + 2024-06-10T11:37:18.120Z https://nipunbatra.github.io/blog/posts/2021-09-03-param-learning-sgd.html - 2024-05-30T12:37:02.345Z + 2024-06-10T11:37:17.168Z https://nipunbatra.github.io/blog/posts/2013-07-01-hmm_continuous.html - 2024-05-30T12:37:02.241Z + 2024-06-10T11:37:17.064Z https://nipunbatra.github.io/blog/posts/2021-08-20-bayesian.html - 2024-05-30T12:37:02.341Z + 2024-06-10T11:37:17.164Z https://nipunbatra.github.io/blog/posts/2018-06-21-aq-india-map.html - 2024-05-30T12:37:02.253Z + 2024-06-10T11:37:17.072Z https://nipunbatra.github.io/blog/posts/2017-04-21-constrained-nmf-cvx.html - 2024-05-30T12:37:02.245Z + 2024-06-10T11:37:17.068Z https://nipunbatra.github.io/blog/posts/stakcing.html - 2024-05-30T12:37:03.197Z + 2024-06-10T11:37:18.008Z https://nipunbatra.github.io/blog/posts/2022-01-29-kl-divergence.html - 2024-05-30T12:37:02.357Z + 2024-06-10T11:37:17.180Z https://nipunbatra.github.io/blog/posts/2018-06-16-active-committee.html - 2024-05-30T12:37:02.253Z + 2024-06-10T11:37:17.072Z https://nipunbatra.github.io/blog/posts/pinn.html - 2024-05-30T12:37:03.237Z + 2024-06-10T11:37:18.048Z https://nipunbatra.github.io/blog/posts/2024-rnn.html - 2024-05-30T12:37:02.409Z + 2024-06-10T11:37:17.228Z https://nipunbatra.github.io/blog/posts/siren-paper.html - 2024-05-30T12:37:03.277Z + 2024-06-10T11:37:18.088Z https://nipunbatra.github.io/blog/posts/2022-02-09-pytorch-learn-normal.html - 2024-05-30T12:37:02.361Z + 2024-06-10T11:37:17.184Z https://nipunbatra.github.io/blog/posts/sympy.html - 2024-05-30T12:37:03.305Z + 2024-06-10T11:37:18.116Z https://nipunbatra.github.io/blog/posts/2022-10-25-mogp.html - 2024-05-30T12:37:02.457Z + 2024-06-10T11:37:17.276Z https://nipunbatra.github.io/blog/posts/2017-06-15-linear-regression-prior.html - 2024-05-30T12:37:02.245Z + 2024-06-10T11:37:17.068Z https://nipunbatra.github.io/blog/posts/2022-01-28-tfp-linear-regression.html - 2024-05-30T12:37:02.353Z + 2024-06-10T11:37:17.176Z https://nipunbatra.github.io/blog/posts/2022-02-14-logistic-regression.html - 2024-05-30T12:37:02.373Z + 2024-06-10T11:37:17.196Z https://nipunbatra.github.io/blog/posts/2021-09-01-hello-julia-language.html - 2024-05-30T12:37:02.345Z + 2024-06-10T11:37:17.168Z https://nipunbatra.github.io/blog/posts/2018-06-26-map-electricity-access.html - 2024-05-30T12:37:02.253Z + 2024-06-10T11:37:17.076Z https://nipunbatra.github.io/blog/posts/2024-forecast.html - 2024-05-30T12:37:02.405Z + 2024-06-10T11:37:17.228Z https://nipunbatra.github.io/blog/posts/2021-06-19-blur-affinity.html - 2024-05-30T12:37:02.341Z + 2024-06-10T11:37:17.164Z https://nipunbatra.github.io/blog/posts/fsgm.html - 2024-05-30T12:37:02.765Z + 2024-06-10T11:37:17.588Z https://nipunbatra.github.io/blog/posts/2022-02-17-pyro-linreg.html - 2024-05-30T12:37:02.377Z + 2024-06-10T11:37:17.200Z https://nipunbatra.github.io/blog/posts/2021-06-12-setup-mac.html - 2024-05-30T12:37:02.297Z + 2024-06-10T11:37:17.120Z https://nipunbatra.github.io/blog/posts/2021-06-17-python-ssh.html - 2024-05-30T12:37:02.301Z + 2024-06-10T11:37:17.120Z https://nipunbatra.github.io/blog/posts/2022-02-14-gmm.html - 2024-05-30T12:37:02.369Z + 2024-06-10T11:37:17.192Z https://nipunbatra.github.io/blog/posts/2013-09-01-denoising.html - 2024-05-30T12:37:02.245Z + 2024-06-10T11:37:17.064Z https://nipunbatra.github.io/blog/posts/2022-02-15-draw-graphical-models.html - 2024-05-30T12:37:02.373Z + 2024-06-10T11:37:17.196Z https://nipunbatra.github.io/blog/posts/2017-04-19-nmf-out-matrix.html - 2024-05-30T12:37:02.245Z + 2024-06-10T11:37:17.068Z https://nipunbatra.github.io/blog/posts/2019-08-20-gaussian-processes.html - 2024-05-30T12:37:02.257Z + 2024-06-10T11:37:17.080Z https://nipunbatra.github.io/blog/posts/2021-06-14-setup-ipad.html - 2024-05-30T12:37:02.297Z + 2024-06-10T11:37:17.120Z https://nipunbatra.github.io/blog/posts/2021-05-31-gan.html - 2024-05-30T12:37:02.297Z + 2024-06-10T11:37:17.120Z https://nipunbatra.github.io/blog/posts/2017-08-13-mf-autograd-adagrad.html - 2024-05-30T12:37:02.249Z + 2024-06-10T11:37:17.072Z https://nipunbatra.github.io/blog/posts/svd.html - 2024-05-30T12:37:03.301Z + 2024-06-10T11:37:18.112Z https://nipunbatra.github.io/blog/posts/2022-01-26-tfp-distributions.html - 2024-05-30T12:37:02.349Z + 2024-06-10T11:37:17.172Z https://nipunbatra.github.io/blog/posts/residual-torch.html - 2024-05-30T12:37:03.245Z + 2024-06-10T11:37:18.056Z https://nipunbatra.github.io/blog/posts/2024-sample-distribution.html - 2024-05-30T12:37:02.409Z + 2024-06-10T11:37:17.232Z https://nipunbatra.github.io/blog/posts/2020-03-02-linear-scratch.html - 2024-05-30T12:37:02.261Z + 2024-06-10T11:37:17.080Z https://nipunbatra.github.io/blog/posts/attention-sequence.html - 2024-05-30T12:37:02.401Z + 2024-06-10T11:37:17.224Z https://nipunbatra.github.io/blog/posts/logo.html - 2024-05-30T12:37:02.401Z + 2024-06-10T11:37:17.224Z https://nipunbatra.github.io/blog/posts/2022-02-11-pytorch-learn-normal-map.html - 2024-05-30T12:37:02.365Z + 2024-06-10T11:37:17.188Z https://nipunbatra.github.io/blog/posts/2013-06-01-hmm_simulate.html - 2024-05-30T12:37:02.241Z + 2024-06-10T11:37:17.064Z https://nipunbatra.github.io/blog/posts/2022-02-17-ppca.html - 2024-05-30T12:37:02.373Z + 2024-06-10T11:37:17.196Z - https://nipunbatra.github.io/blog/posts/2020-02-20-bayesian-linear-regression.html - 2024-05-30T12:37:02.257Z + https://nipunbatra.github.io/blog/posts/2024-06-10-shortcuts-mac.html + 2024-06-10T11:37:17.216Z + + + https://nipunbatra.github.io/blog/posts/vscode-tips/index.html + 2024-06-10T11:37:18.144Z https://nipunbatra.github.io/blog/index.html - 2024-05-30T12:37:02.225Z + 2024-06-10T11:37:17.044Z - https://nipunbatra.github.io/blog/posts/vscode-tips/index.html - 2024-05-30T12:37:03.333Z + https://nipunbatra.github.io/blog/posts/2020-02-20-bayesian-linear-regression.html + 2024-06-10T11:37:17.080Z https://nipunbatra.github.io/blog/posts/2022-02-05-simple-dgm.html - 2024-05-30T12:37:02.357Z + 2024-06-10T11:37:17.180Z https://nipunbatra.github.io/blog/posts/2020-03-28-active_learning_with_bayesian_linear_regression.html - 2024-05-30T12:37:02.293Z + 2024-06-10T11:37:17.116Z https://nipunbatra.github.io/blog/posts/towards-transformers.html - 2024-05-30T12:37:02.381Z + 2024-06-10T11:37:17.204Z https://nipunbatra.github.io/blog/posts/autoencoder.html - 2024-05-30T12:37:02.637Z + 2024-06-10T11:37:17.460Z https://nipunbatra.github.io/blog/posts/2020-03-26-gp.html - 2024-05-30T12:37:02.269Z + 2024-06-10T11:37:17.088Z https://nipunbatra.github.io/blog/posts/auto-pytorch.html - 2024-05-30T12:37:03.197Z + 2024-06-10T11:37:18.008Z https://nipunbatra.github.io/blog/posts/2022-02-24-audio-filtering.html - 2024-05-30T12:37:02.381Z + 2024-06-10T11:37:17.204Z https://nipunbatra.github.io/blog/posts/2024-attention.html - 2024-05-30T12:37:02.401Z + 2024-06-10T11:37:17.224Z https://nipunbatra.github.io/blog/posts/2018-01-13-denoising.html - 2024-05-30T12:37:02.253Z + 2024-06-10T11:37:17.072Z https://nipunbatra.github.io/blog/posts/2022-02-20-condition-pyro.html - 2024-05-30T12:37:02.377Z + 2024-06-10T11:37:17.200Z https://nipunbatra.github.io/blog/posts/2020-03-29-param-learning.html - 2024-05-30T12:37:02.293Z + 2024-06-10T11:37:17.116Z https://nipunbatra.github.io/blog/posts/2022-02-11-matrix.html - 2024-05-30T12:37:02.365Z + 2024-06-10T11:37:17.188Z https://nipunbatra.github.io/blog/posts/2017-08-02-fifty-ggplot-python-1.html - 2024-05-30T12:37:02.249Z + 2024-06-10T11:37:17.068Z https://nipunbatra.github.io/blog/posts/object-detection.html - 2024-05-30T12:37:02.405Z + 2024-06-10T11:37:17.228Z https://nipunbatra.github.io/blog/posts/2021-06-16-shortcuts-ipad.html - 2024-05-30T12:37:02.297Z + 2024-06-10T11:37:17.120Z https://nipunbatra.github.io/blog/posts/strassen.html - 2024-05-30T12:37:02.389Z + 2024-06-10T11:37:17.212Z https://nipunbatra.github.io/blog/posts/2023-01-19-conformal-intro.html - 2024-05-30T12:37:02.661Z + 2024-06-10T11:37:17.480Z https://nipunbatra.github.io/blog/posts/siren-paper-impl.html - 2024-05-30T12:37:03.277Z + 2024-06-10T11:37:18.088Z https://nipunbatra.github.io/blog/posts/2020-06-26-gp-understand.html - 2024-05-30T12:37:02.297Z + 2024-06-10T11:37:17.120Z https://nipunbatra.github.io/blog/posts/2021-06-18-audio-filters.html - 2024-05-30T12:37:02.341Z + 2024-06-10T11:37:17.164Z https://nipunbatra.github.io/blog/posts/2020-01-14-test-markdown-post.html - 2024-05-30T12:37:02.257Z + 2024-06-10T11:37:17.080Z https://nipunbatra.github.io/blog/posts/np.html - 2024-05-30T12:37:02.393Z + 2024-06-10T11:37:17.216Z https://nipunbatra.github.io/blog/posts/2014-06-02-latexify.html - 2024-05-30T12:37:02.245Z + 2024-06-10T11:37:17.068Z https://nipunbatra.github.io/blog/posts/2013-04-01-download_weather.html - 2024-05-30T12:37:02.241Z + 2024-06-10T11:37:17.060Z https://nipunbatra.github.io/blog/posts/2018-08-18-placement-preparation-2018-1-hashmap.html - 2024-05-30T12:37:02.253Z + 2024-06-10T11:37:17.076Z https://nipunbatra.github.io/blog/posts/tensorboard.html - 2024-05-30T12:37:03.305Z + 2024-06-10T11:37:18.116Z https://nipunbatra.github.io/blog/posts/2020-02-28-xor-relu-vector.html - 2024-05-30T12:37:02.257Z + 2024-06-10T11:37:17.080Z https://nipunbatra.github.io/blog/posts/2020-03-08-keras-neural-non-linear.html - 2024-05-30T12:37:02.261Z + 2024-06-10T11:37:17.080Z https://nipunbatra.github.io/blog/posts/torch-likelihoods.html - 2024-05-30T12:37:03.309Z + 2024-06-10T11:37:18.120Z https://nipunbatra.github.io/blog/posts/2018-01-07-cs-phd-lessons.html - 2024-05-30T12:37:02.253Z + 2024-06-10T11:37:17.072Z https://nipunbatra.github.io/blog/posts/bald.html - 2024-05-30T12:37:02.381Z + 2024-06-10T11:37:17.204Z https://nipunbatra.github.io/blog/posts/moe.html - 2024-05-30T12:37:02.397Z + 2024-06-10T11:37:17.220Z https://nipunbatra.github.io/blog/posts/2020-04-16-inverse-transform.html - 2024-05-30T12:37:02.293Z + 2024-06-10T11:37:17.116Z https://nipunbatra.github.io/blog/posts/transcript.html - 2024-05-30T12:37:02.389Z + 2024-06-10T11:37:17.212Z https://nipunbatra.github.io/blog/posts/mvn-nn.html - 2024-05-30T12:37:02.389Z + 2024-06-10T11:37:17.212Z https://nipunbatra.github.io/blog/posts/2022-11-20-binomial-poisson-distribution.html - 2024-05-30T12:37:02.641Z + 2024-06-10T11:37:17.460Z https://nipunbatra.github.io/blog/posts/2022-02-05-lr.html - 2024-05-30T12:37:02.357Z + 2024-06-10T11:37:17.180Z https://nipunbatra.github.io/blog/posts/2022-02-21-coordinate-descent-failure.html - 2024-05-30T12:37:02.377Z + 2024-06-10T11:37:17.200Z https://nipunbatra.github.io/blog/posts/positional-encoding.html - 2024-05-30T12:37:03.245Z + 2024-06-10T11:37:18.056Z https://nipunbatra.github.io/blog/posts/2022-02-07-coin-toss.html - 2024-05-30T12:37:02.361Z + 2024-06-10T11:37:17.184Z https://nipunbatra.github.io/blog/posts/2014-06-01-em.html - 2024-05-30T12:37:02.245Z + 2024-06-10T11:37:17.064Z https://nipunbatra.github.io/blog/posts/2022-02-12-variational-inference.html - 2024-05-30T12:37:02.369Z + 2024-06-10T11:37:17.192Z https://nipunbatra.github.io/blog/posts/2014-07-01-mcmc_coins.html - 2024-05-30T12:37:02.245Z + 2024-06-10T11:37:17.068Z https://nipunbatra.github.io/blog/posts/sr.html - 2024-05-30T12:37:02.393Z + 2024-06-10T11:37:17.216Z https://nipunbatra.github.io/blog/posts/2017-04-20-parafac-out-tensor.html - 2024-05-30T12:37:02.245Z + 2024-06-10T11:37:17.068Z https://nipunbatra.github.io/blog/posts/rl.html - 2024-05-30T12:37:02.389Z + 2024-06-10T11:37:17.212Z https://nipunbatra.github.io/blog/posts/2022-02-09-autograd-pytorch-jax.html - 2024-05-30T12:37:02.361Z + 2024-06-10T11:37:17.184Z https://nipunbatra.github.io/blog/posts/welcome/index.html - 2024-05-30T12:37:03.333Z + 2024-06-10T11:37:18.144Z https://nipunbatra.github.io/blog/posts/2014-05-01-dtw.html - 2024-05-30T12:37:02.245Z + 2024-06-10T11:37:17.064Z https://nipunbatra.github.io/blog/posts/2022-10-27-calibration.html - 2024-05-30T12:37:03.245Z + 2024-06-10T11:37:18.056Z https://nipunbatra.github.io/blog/posts/mv-taylor.html - 2024-05-30T12:37:03.193Z + 2024-06-10T11:37:18.004Z https://nipunbatra.github.io/blog/posts/stacking.html - 2024-05-30T12:37:03.301Z + 2024-06-10T11:37:18.112Z
Shortcut