From 858dbaeac895e77f68e44fc32eed5f7fda9483b9 Mon Sep 17 00:00:00 2001
From: Wanwan Feng <453951123@qq.com>
Date: Tue, 24 Nov 2020 02:16:24 +0800
Subject: [PATCH] Update README.md

---
 README.md | 75 ++++++++++++++-----------------------------------------
 1 file changed, 19 insertions(+), 56 deletions(-)
diff --git a/README.md b/README.md
index 62629be..c46ce4f 100644
--- a/README.md
+++ b/README.md
@@ -83,14 +83,18 @@ unary_scale_factor=100
 label_cost=10
 algorithm='expansion'
 ff = 'README_file/Rep'+str(j)+'_MOB_count_matrix-1.tsv'
-locs,data=read_spatial_expression(ff,sep='\t',num_exp_genes=0.01, num_exp_spots=0.05, min_expression=1)
+locs,data,_=read_spatial_expression(ff,sep='\t',num_exp_genes=0.01, num_exp_spots=0.05, min_expression=1)
 
 data_norm = normalize_count_cellranger(data)
 print('Rep{}_processing: {}'.format(j,data_norm.shape))
 ```
 
-    raw data dim: (262, 16218)
-    Rep11_processing: (259, 12522)
+raw data dim: (262, 16218)
+Number of expressed genes a spot must have to be kept (0.01% of total expressed genes) 3375
+Marked 3 spots
+Removing genes that are expressed in less than 3 spots with a count of at least 1
+Dropped 1559 genes
+Rep11_processing: (262, 14659)
     
 
 ## **Step 2:**  
@@ -129,46 +133,6 @@ plt.title('CellGraph')
 
 ![png](README_file/output_9_1.png)
 
-<br><font size=3> Both Mouse olfactor bulb and Breast cancer data sets used in the analysis have missing spots, creating holes in the grid. We recomment the users to fill the missing spot, and impute the missing values using <mark><font color=red> **AddPoints_XY_and_update_data**</font></mark> function, for more accurate analysis results. <font>  
-
-
-```python
-locs_new,data_norm_new,newPoints=AddPoints_XY_and_update_data(locs,data_norm,cellGraph,axis=1)
-print('Filled {} points'.format(len(newPoints)))
-print('After filled points: ', data_norm_new.shape)
-```
-
-    Filled 6 points
-    After filled points:  (265, 12522)
-    
-
-
-```python
-fig, ax= plt.subplots(1,1,figsize=(5,5)) #, dpi=300)
-ax.set_aspect('equal')
-
-exp_new = data_norm_new.iloc[:,0].values
-cellGraph_new = create_graph_with_weight(locs_new, exp_new)
-ax.scatter(locs_new[:,0], locs_new[:,1], s=1, color='black')
-for i in np.arange(cellGraph_new.shape[0]):
-    x = (locs_new[int(cellGraph_new[i,0]), 0], locs_new[int(cellGraph_new[i,1]), 0]) 
-    y = (locs_new[int(cellGraph_new[i,0]), 1], locs_new[int(cellGraph_new[i,1]), 1])     
-    ax.plot(x, y, color='black', linewidth=0.5)
-    
-plt.title('New CellGraph')
-```
-
-
-
-
-    Text(0.5, 1.0, 'New CellGraph')
-
-
-
-
-![png](README_file/output_12_1.png)
-
-
 ## **Step3:**
 ### Gene expression classification via Gaussian mixture modeling
 
@@ -355,7 +319,7 @@ smooth_factor=20
 
 ff = '../../data/Raw_data/MOB-breast_cancer/Rep11_MOB_count_matrix-1.tsv' 
 # read in spatial gene expression data
-locs, data = read_spatial_expression(ff,sep='\t')
+locs, data, _ = read_spatial_expression(ff,sep='\t')
 
 # normalize gene expression
 data_norm = normalize_count_cellranger(data)
@@ -366,27 +330,26 @@ exp =  data_norm.iloc[:,0]
 # create graph representation of spatial coordinates of cells
 cellGraph = create_graph_with_weight(locs, exp)
 
-## Fill spots
-locs_new,data_norm_new,newPoints=AddPoints_XY_and_update_data(locs,data_norm,cellGraph,axis=1)
-
-# recreate new cellGraph after filled sopts.
-exp_new =  data_norm_new.iloc[:,0]
-cellGraph_new = create_graph_with_weight(locs_new, exp_new)
-
         
 # GMM 
-count = data_norm_new.loc[:,geneID].values
+count = data_norm.loc[:,geneID].values
 gmm=perform_gmm(count)
 
 # do graph cut
 temp_factor=smooth_factor
-newLabels = cut_graph_general(cellGraph_new, count, gmm, unary_scale_factor, 
-                                           temp_factor, label_cost, algorithm)
+newLabels, label_pred = cut_graph_general(cellGraph, count, gmm, unary_scale_factor, 
+                                        temp_factor, label_cost, algorithm)
+    
 # calculate p values
-p, node, com = compute_p_CSR(locs_new, newLabels, gmm, count, cellGraph_new)
+p, node, com = compute_p_CSR(locs, newLabels, gmm, count, cellGraph)
 
 # Visualize graph cut results
-plot_voronoi_boundary(geneID, locs_new, count,  newLabels, min(p)) 
+plot_voronoi_boundary(geneID, locs, count,  newLabels, min(p)) 
+
+# save the graph cut results to pdf
+# pdf_voronoi_boundary(geneID, locs, count, newLabels, min(p), 
+#                    fileName=None, #  '../../results//{}.pdf'.format(geneID),
+#                     point_size=0)
 
 ```