From 858dbaeac895e77f68e44fc32eed5f7fda9483b9 Mon Sep 17 00:00:00 2001
From: Wanwan Feng <453951123@qq.com>
Date: Tue, 24 Nov 2020 02:16:24 +0800
Subject: [PATCH] Update README.md
---
README.md | 75 ++++++++++++++-----------------------------------------
1 file changed, 19 insertions(+), 56 deletions(-)
diff --git a/README.md b/README.md
index 62629be..c46ce4f 100644
--- a/README.md
+++ b/README.md
@@ -83,14 +83,18 @@ unary_scale_factor=100
label_cost=10
algorithm='expansion'
ff = 'README_file/Rep'+str(j)+'_MOB_count_matrix-1.tsv'
-locs,data=read_spatial_expression(ff,sep='\t',num_exp_genes=0.01, num_exp_spots=0.05, min_expression=1)
+locs,data,_=read_spatial_expression(ff,sep='\t',num_exp_genes=0.01, num_exp_spots=0.05, min_expression=1)
data_norm = normalize_count_cellranger(data)
print('Rep{}_processing: {}'.format(j,data_norm.shape))
```
- raw data dim: (262, 16218)
- Rep11_processing: (259, 12522)
+raw data dim: (262, 16218)
+Number of expressed genes a spot must have to be kept (0.01% of total expressed genes) 3375
+Marked 3 spots
+Removing genes that are expressed in less than 3 spots with a count of at least 1
+Dropped 1559 genes
+Rep11_processing: (262, 14659)
## **Step 2:**
@@ -129,46 +133,6 @@ plt.title('CellGraph')
![png](README_file/output_9_1.png)
-
Both Mouse olfactor bulb and Breast cancer data sets used in the analysis have missing spots, creating holes in the grid. We recomment the users to fill the missing spot, and impute the missing values using **AddPoints_XY_and_update_data** function, for more accurate analysis results.
-
-
-```python
-locs_new,data_norm_new,newPoints=AddPoints_XY_and_update_data(locs,data_norm,cellGraph,axis=1)
-print('Filled {} points'.format(len(newPoints)))
-print('After filled points: ', data_norm_new.shape)
-```
-
- Filled 6 points
- After filled points: (265, 12522)
-
-
-
-```python
-fig, ax= plt.subplots(1,1,figsize=(5,5)) #, dpi=300)
-ax.set_aspect('equal')
-
-exp_new = data_norm_new.iloc[:,0].values
-cellGraph_new = create_graph_with_weight(locs_new, exp_new)
-ax.scatter(locs_new[:,0], locs_new[:,1], s=1, color='black')
-for i in np.arange(cellGraph_new.shape[0]):
- x = (locs_new[int(cellGraph_new[i,0]), 0], locs_new[int(cellGraph_new[i,1]), 0])
- y = (locs_new[int(cellGraph_new[i,0]), 1], locs_new[int(cellGraph_new[i,1]), 1])
- ax.plot(x, y, color='black', linewidth=0.5)
-
-plt.title('New CellGraph')
-```
-
-
-
-
- Text(0.5, 1.0, 'New CellGraph')
-
-
-
-
-![png](README_file/output_12_1.png)
-
-
## **Step3:**
### Gene expression classification via Gaussian mixture modeling
@@ -355,7 +319,7 @@ smooth_factor=20
ff = '../../data/Raw_data/MOB-breast_cancer/Rep11_MOB_count_matrix-1.tsv'
# read in spatial gene expression data
-locs, data = read_spatial_expression(ff,sep='\t')
+locs, data, _ = read_spatial_expression(ff,sep='\t')
# normalize gene expression
data_norm = normalize_count_cellranger(data)
@@ -366,27 +330,26 @@ exp = data_norm.iloc[:,0]
# create graph representation of spatial coordinates of cells
cellGraph = create_graph_with_weight(locs, exp)
-## Fill spots
-locs_new,data_norm_new,newPoints=AddPoints_XY_and_update_data(locs,data_norm,cellGraph,axis=1)
-
-# recreate new cellGraph after filled sopts.
-exp_new = data_norm_new.iloc[:,0]
-cellGraph_new = create_graph_with_weight(locs_new, exp_new)
-
# GMM
-count = data_norm_new.loc[:,geneID].values
+count = data_norm.loc[:,geneID].values
gmm=perform_gmm(count)
# do graph cut
temp_factor=smooth_factor
-newLabels = cut_graph_general(cellGraph_new, count, gmm, unary_scale_factor,
- temp_factor, label_cost, algorithm)
+newLabels, label_pred = cut_graph_general(cellGraph, count, gmm, unary_scale_factor,
+ temp_factor, label_cost, algorithm)
+
# calculate p values
-p, node, com = compute_p_CSR(locs_new, newLabels, gmm, count, cellGraph_new)
+p, node, com = compute_p_CSR(locs, newLabels, gmm, count, cellGraph)
# Visualize graph cut results
-plot_voronoi_boundary(geneID, locs_new, count, newLabels, min(p))
+plot_voronoi_boundary(geneID, locs, count, newLabels, min(p))
+
+# save the graph cut results to pdf
+# pdf_voronoi_boundary(geneID, locs, count, newLabels, min(p),
+# fileName=None, # '../../results//{}.pdf'.format(geneID),
+# point_size=0)
```