diff --git a/examples/mxnet/gcn/README.md b/examples/mxnet/gcn/README.md
index c389975d5376..e0408edd0446 100644
--- a/examples/mxnet/gcn/README.md
+++ b/examples/mxnet/gcn/README.md
@@ -19,8 +19,6 @@ Codes
 The folder contains three implementations of GCN:
 - `gcn.py` uses DGL's predefined graph convolution module.
 - `gcn_mp.py` uses user-defined message and reduce functions.
-- `gcn_spmv.py` improves from `gcn_mp.py` by using DGL's builtin functions
-   so SPMV optimization could be applied.
 Modify `train.py` to switch between different implementations.
 
 The provided implementation in `gcn_concat.py` is a bit different from the
@@ -40,8 +38,7 @@ DGLBACKEND=mxnet python3 train.py --dataset cora --gpu 0 --self-loop
 Results (`gcn_concat.py vs. gcn.py`)
 ------------------------------------
 `gcn_concat.py` uses concatenation of hidden units to account for multi-hop
-  skip-connections, while `gcn_spmv.py` uses simple additions (the original paper
-omitted this detail). We feel concatenation is superior
+  skip-connections. We feel concatenation is superior
 because all neighboring information is presented without additional modeling
 assumptions.
 These results are based on single-run training to minimize the cross-entropy
@@ -56,12 +53,6 @@ factorization. Given the small datasets, we can't draw such conclusions from the
 # Final accuracy 57.70% MLP without GCN
 DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "citeseer" --n-epochs 200 --n-layers 0
 
-# Final accuracy 68.20% with 2-layer GCN
-DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_spmv.py --dataset "citeseer" --n-epochs 200 --n-layers 1
-
-# Final accuracy 18.40% with 10-layer GCN
-DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_spmv.py --dataset "citeseer" --n-epochs 200 --n-layers 9
-
 # Final accuracy 65.70% with 10-layer GCN with skip connection
 DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "citeseer" --n-epochs 200 --n-layers 2 --normalization 'sym' --self-loop
 
@@ -74,12 +65,6 @@ DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "citeseer" -
 # Final accuracy 53.20% MLP without GCN
 DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "cora" --n-epochs 200 --n-layers 0
 
-# Final accuracy 81.40% with 2-layer GCN
-DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_spmv.py --dataset "cora" --n-epochs 200 --n-layers 1
-
-# Final accuracy 27.60% with 10-layer GCN
-DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_spmv.py --dataset "cora" --n-epochs 200 --n-layers 9
-
 # Final accuracy 72.60% with 2-layer GCN with skip connection
 DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "cora" --n-epochs 200 --n-layers 2 --normalization 'sym' --self-loop
 
@@ -92,12 +77,6 @@ DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "cora" --n-e
 # Final accuracy 70.30% MLP without GCN
 DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "pubmed" --n-epochs 200 --n-layers 0
 
-# Final accuracy 77.40% with 2-layer GCN
-DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_spmv.py --dataset "pubmed" --n-epochs 200 --n-layers 1
-
-# Final accuracy 36.20% with 10-layer GCN
-DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_spmv.py --dataset "pubmed" --n-epochs 200 --n-layers 9
-
 # Final accuracy 78.30% with 2-layer GCN with skip connection
 DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "pubmed" --n-epochs 200 --n-layers 2 --normalization 'sym' --self-loop
 
diff --git a/examples/mxnet/gcn/gcn_spmv.py b/examples/mxnet/gcn/gcn_spmv.py
deleted file mode 100644
index 1f475e640087..000000000000
--- a/examples/mxnet/gcn/gcn_spmv.py
+++ /dev/null
@@ -1,79 +0,0 @@
-"""GCN using builtin functions that enables SPMV optimization.
-
-References:
-- Semi-Supervised Classification with Graph Convolutional Networks
-- Paper: https://arxiv.org/abs/1609.02907
-- Code: https://github.com/tkipf/gcn
-"""
-import math
-import mxnet as mx
-from mxnet import gluon
-import dgl
-import dgl.function as fn
-
-class GCNLayer(gluon.Block):
-    def __init__(self,
-                 g,
-                 in_feats,
-                 out_feats,
-                 activation,
-                 dropout,
-                 bias=True):
-        super(GCNLayer, self).__init__()
-        self.g = g
-        with self.name_scope():
-            stdv = 1. / math.sqrt(out_feats)
-            self.weight = self.params.get('weight', shape=(in_feats, out_feats),
-                    init=mx.init.Uniform(stdv))
-            if bias:
-                self.bias = self.params.get('bias', shape=(out_feats,),
-                    init=mx.init.Uniform(stdv))
-            else:
-                self.bias = None
-        self.activation = activation
-        self.dropout = dropout
-
-    def forward(self, h):
-        if self.dropout:
-            h = mx.nd.Dropout(h, p=self.dropout)
-        h = mx.nd.dot(h, self.weight.data(h.context))
-        # normalization by square root of src degree
-        h = h * self.g.ndata['norm']
-        self.g.ndata['h'] = h
-        self.g.update_all(fn.copy_src(src='h', out='m'),
-                          fn.sum(msg='m', out='h'))
-        h = self.g.ndata.pop('h')
-        # normalization by square root of dst degree
-        h = h * self.g.ndata['norm']
-        # bias
-        if self.bias is not None:
-            h = h + self.bias.data(h.context)
-        if self.activation:
-            h = self.activation(h)
-        return h
-
-class GCN(gluon.Block):
-    def __init__(self,
-                 g,
-                 in_feats,
-                 n_hidden,
-                 n_classes,
-                 n_layers,
-                 activation,
-                 dropout):
-        super(GCN, self).__init__()
-        self.layers = gluon.nn.Sequential()
-        # input layer
-        self.layers.add(GCNLayer(g, in_feats, n_hidden, activation, 0.))
-        # hidden layers
-        for i in range(n_layers - 1):
-            self.layers.add(GCNLayer(g, n_hidden, n_hidden, activation, dropout))
-        # output layer
-        self.layers.add(GCNLayer(g, n_hidden, n_classes, None, dropout))
-
-
-    def forward(self, features):
-        h = features
-        for layer in self.layers:
-            h = layer(h)
-        return h
diff --git a/examples/pytorch/gcn/README.md b/examples/pytorch/gcn/README.md
index 5fa2e3c25c77..f76e88365815 100644
--- a/examples/pytorch/gcn/README.md
+++ b/examples/pytorch/gcn/README.md
@@ -19,8 +19,6 @@ Codes
 The folder contains three implementations of GCN:
 - `gcn.py` uses DGL's predefined graph convolution module.
 - `gcn_mp.py` uses user-defined message and reduce functions.
-- `gcn_spmv.py` improves from `gcn_mp.py` by using DGL's builtin functions
-   so SPMV optimization could be applied.
 Modify `train.py` to switch between different implementations.
 
 Results
diff --git a/examples/pytorch/gcn/gcn_spmv.py b/examples/pytorch/gcn/gcn_spmv.py
deleted file mode 100644
index f4af486d9b5d..000000000000
--- a/examples/pytorch/gcn/gcn_spmv.py
+++ /dev/null
@@ -1,83 +0,0 @@
-"""GCN using builtin functions that enables SPMV optimization.
-
-References:
-- Semi-Supervised Classification with Graph Convolutional Networks
-- Paper: https://arxiv.org/abs/1609.02907
-- Code: https://github.com/tkipf/gcn
-"""
-import math
-import torch
-import torch.nn as nn
-import dgl.function as fn
-
-class GCNLayer(nn.Module):
-    def __init__(self,
-                 g,
-                 in_feats,
-                 out_feats,
-                 activation,
-                 dropout,
-                 bias=True):
-        super(GCNLayer, self).__init__()
-        self.g = g
-        self.weight = nn.Parameter(torch.Tensor(in_feats, out_feats))
-        if bias:
-            self.bias = nn.Parameter(torch.Tensor(out_feats))
-        else:
-            self.bias = None
-        self.activation = activation
-        if dropout:
-            self.dropout = nn.Dropout(p=dropout)
-        else:
-            self.dropout = 0.
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        stdv = 1. / math.sqrt(self.weight.size(1))
-        self.weight.data.uniform_(-stdv, stdv)
-        if self.bias is not None:
-            self.bias.data.uniform_(-stdv, stdv)
-
-    def forward(self, h):
-        if self.dropout:
-            h = self.dropout(h)
-        h = torch.mm(h, self.weight)
-        # normalization by square root of src degree
-        h = h * self.g.ndata['norm']
-        self.g.ndata['h'] = h
-        self.g.update_all(fn.copy_src(src='h', out='m'),
-                          fn.sum(msg='m', out='h'))
-        h = self.g.ndata.pop('h')
-        # normalization by square root of dst degree
-        h = h * self.g.ndata['norm']
-        # bias
-        if self.bias is not None:
-            h = h + self.bias
-        if self.activation:
-            h = self.activation(h)
-        return h
-
-class GCN(nn.Module):
-    def __init__(self,
-                 g,
-                 in_feats,
-                 n_hidden,
-                 n_classes,
-                 n_layers,
-                 activation,
-                 dropout):
-        super(GCN, self).__init__()
-        self.layers = nn.ModuleList()
-        # input layer
-        self.layers.append(GCNLayer(g, in_feats, n_hidden, activation, 0.))
-        # hidden layers
-        for i in range(n_layers - 1):
-            self.layers.append(GCNLayer(g, n_hidden, n_hidden, activation, dropout))
-        # output layer
-        self.layers.append(GCNLayer(g, n_hidden, n_classes, None, dropout))
-
-    def forward(self, features):
-        h = features
-        for layer in self.layers:
-            h = layer(h)
-        return h
diff --git a/tests/scripts/task_example_test.bat b/tests/scripts/task_example_test.bat
index aa7085231ed9..2eac8dfcd2c3 100644
--- a/tests/scripts/task_example_test.bat
+++ b/tests/scripts/task_example_test.bat
@@ -25,7 +25,6 @@ SET DGL_DOWNLOAD_DIR=!CD!
 PUSHD !GCN_EXAMPLE_DIR!
 python pagerank.py || GOTO :FAIL
 python gcn\gcn.py --dataset cora --gpu !DEV! || GOTO :FAIL
-python gcn\gcn_spmv.py --dataset cora --gpu !DEV! || GOTO :FAIL
 POPD
 ENDLOCAL
 EXIT /B
diff --git a/tests/scripts/task_example_test.sh b/tests/scripts/task_example_test.sh
index f32051cd79fc..c441ebc34f3f 100644
--- a/tests/scripts/task_example_test.sh
+++ b/tests/scripts/task_example_test.sh
@@ -40,6 +40,5 @@ pushd $GCN_EXAMPLE_DIR> /dev/null
 
 python3 pagerank.py || fail "run pagerank.py on $1"
 python3 gcn/gcn.py --dataset cora --gpu $dev || fail "run gcn/gcn.py on $1"
-python3 gcn/gcn_spmv.py --dataset cora --gpu $dev || fail "run gcn/gcn_spmv.py on $1"
 
-popd > /dev/null
\ No newline at end of file
+popd > /dev/null