seurat_analysis.Rmd

---
title: "seurat_analysis"
author: "Irzam Sarfraz"
date: "2023-02-12"
output: html_document
---

```{r}
.libPaths("/projectnb/paxlab/isarfraz/RProjects/libs")

library(ggplot2)
library(Seurat)
library(Signac)
library(EnsDb.Hsapiens.v86)
library(cowplot)
# library(SeuratData)
library(SingleCellExperiment)
# InstallData("pbmcMultiome")

# pbmc.rna <- LoadData("pbmcMultiome", "pbmc.rna")
# pbmc.atac <- LoadData("pbmcMultiome", "pbmc.atac")
```

```{r}
atac <- readRDS("/projectnb/paxlab/isarfraz/Greenleaf_Data_RDS/scATAC-All-Hematopoiesis-MPAL-191120.rds")
# atac <- readRDS("/projectnb/paxlab/isarfraz/Greenleaf_Data_RDS/scATAC-Healthy-Hematopoiesis-191120.rds")
peaks <- assay(atac, "counts")
# gl_atac_all <- CreateSeuratObject(counts = peaks, assay = "peaks", project = "10x_ATAC")

rna <- readRDS("/projectnb/paxlab/isarfraz/Greenleaf_Data_RDS/scRNA-All-Hematopoiesis-MPAL-191120.rds")
counts <- assay(rna, "counts")
gl_rna_all <- CreateSeuratObject(counts = counts, assay = "RNA", project = "10x_ATAC")
```

process atac with signac

```{r}
# https://stuartlab.org/signac/articles/pbmc_vignette.html

chrom_assay <- CreateChromatinAssay(counts = peaks, sep = c("_", "_"))
gl_atac_all <- CreateSeuratObject(
  counts = chrom_assay,
  assay = "peaks"
)

# extract gene annotations from EnsDb
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
Annotation(gl_atac_all) <- annotations

# QC
#######
# for all qc functions need fragment files

# Normalization and downstream
#########
# this combined is LSI
gl_atac_all <- RunTFIDF(gl_atac_all)
gl_atac_all <- FindTopFeatures(gl_atac_all, min.cutoff = 'q75')
gl_atac_all <- RunSVD(gl_atac_all)

# checking if first lsi component correlated with technical variation
# if true, remove this component
DepthCor(gl_atac_all) # seems like it might be (seurat shows neg and removes, also for pos?)

# umap and clustering
gl_atac_all <- RunUMAP(object = gl_atac_all, reduction = 'lsi', dims = 2:30)
gl_atac_all <- FindNeighbors(object = gl_atac_all, reduction = 'lsi', dims = 2:30)
gl_atac_all <- FindClusters(object = gl_atac_all, verbose = FALSE, algorithm = 3)
DimPlot(object = gl_atac_all, label = TRUE) + NoLegend()

# gene activity
########
# gene.activities <- GeneActivity(gl_atac_all) # needs fragment files
# i will load from greenleaf processed gene activity using cicero
gene.activities <- readRDS("/projectnb/paxlab/isarfraz/Greenleaf_Data_RDS/scATAC-Cicero-GA-Hematopoiesis-MPAL-191120.rds")
# gene.activities <- readRDS("/projectnb/paxlab/isarfraz/Greenleaf_Data_RDS/scATAC-Cicero-GA-Hematopoiesis-191120.rds")
gene.activities <- assay(gene.activities, "gA")
# add the gene activity matrix to the Seurat object as a new assay and normalize it
gl_atac_all[['RNA']] <- CreateAssayObject(data = gene.activities)
# already normalized
# gl_atac_all <- NormalizeData(
#   object = gl_atac_all,
#   assay = 'RNA',
#   normalization.method = 'LogNormalize',
#   scale.factor = median(gl_atac_all$nCount_RNA)
# )
# now to visualize gA exp
DefaultAssay(gl_atac_all) <- 'RNA'

# basic cell types
# monocytes, B, T, and NK cells
# FeaturePlot(
#   object = gl_atac_all,
#   features = c('MS4A1', 'CD3D', 'LEF1', 'NKG7', 'TREM1', 'LYZ'),
#   pt.size = 0.1,
#   max.cutoff = 'q95',
#   ncol = 3
# )

# T and B cells = CD3E, CD79A
FeaturePlot(
  object = gl_atac_all,
  features = c('CD3E', 'CD79A'),
  pt.size = 0.1,
  max.cutoff = 'q95',
  ncol = 3
)

# Note that the activities will be much noisier than scRNA-seq measurements. This is because they represent measurements from sparse chromatin data, and because they assume a general correspondence between gene body/promoter accessibility and gene expression which may not always be the case. 

# very scattered. matches the similar genes but not separate clusters.
# just use healthy? or what greenleaf did
###################
############


# scRNA data now
rna <- readRDS("/projectnb/paxlab/isarfraz/Greenleaf_Data_RDS/scRNA-Healthy-Hematopoiesis-191120.rds")
counts <- assay(rna, "counts")
rna <- CreateSeuratObject(counts = counts, assay = "RNA", project = "10x_ATAC")
rna <- NormalizeData(rna)
rna <- FindVariableFeatures(rna, nfeatures = 3000)
rna <- ScaleData(rna)
rna <- RunPCA(rna, npcs = 100)
rna <- RunTSNE(rna, dims = 1:30)
rna <- FindNeighbors(rna, dims = 1:30)
rna <- FindClusters(rna, resolution = 0.4, algorithm = 3)
rna <- RunUMAP(rna, graph = 'RNA_nn', metric = 'euclidean')

FeaturePlot(
  object = rna,
  features = c('CD3E', 'CD79A'),
  pt.size = 0.1,
  max.cutoff = 'q95',
  ncol = 3
)

######
# pbmc_rna <- readRDS("../vignette_data/pbmc_10k_v3.rds")
transfer.anchors <- FindTransferAnchors(
  reference = rna,
  query = gl_atac_all,
  reduction = 'cca'
)

# needs cell-types


```