| 77 | As of 2020 there are more than a dozen algorithms available for integrating single cell RNA-seq data-sets. Three such methods are canonical correlation analysis (implemented in Seurat), iterative linear correction based on soft clustering (implemented in Harmony) and integrative nonnegative matrix factorization (implemented in LIGER). Commands for using each of these methods from within a Seurat workflow are given below. |
| 78 | |
| 79 | * Using CCA in Seurat (please see T. Stuart ''et al''. “Comprehensive Integration of Single-Cell Data”, ''Cell'' '''177''', 1888-1902 (2019), the associated Seurat v.3 vignette and the documentation for the FindIntegrationAnchors function): |
| 80 | {{{ |
| 81 | library(Seurat) |
| 82 | |
| 83 | # Merge two or more Seurat objects, objA and objB, from different batches. |
| 84 | all <- merge(x=objA,y=objB,add.cell.ids=c("A","B")) |
| 85 | |
| 86 | # Split and re-integrate the merged object according to the batch slot. |
| 87 | s3.list <- SplitObject(all, split.by = "batch") |
| 88 | |
| 89 | # This loop normalizes each experiment separately first. |
| 90 | for (i in 1:length(s3.list)) { |
| 91 | s3.list[[i]] <- NormalizeData(s3.list[[i]], verbose = FALSE) |
| 92 | s3.list[[i]] <- FindVariableFeatures(s3.list[[i]], selection.method = "vst", nfeatures = 2000, verbose = FALSE) |
| 93 | } |
| 94 | |
| 95 | # Find so-called anchors. |
| 96 | s3.anchors <- FindIntegrationAnchors(object.list = s3.list) |
| 97 | s3.integrated <- IntegrateData(anchorset = s3.anchors) |
| 98 | DefaultAssay(s3.integrated) <- "integrated" |
| 99 | }}} |
| 100 | |
| 101 | * Using Harmony from within Seurat (please see I. Korsunsky ''et al.'' “Fast, sensitive and accurate integration of single-cell data with Harmony”, ''Nature Methods'' '''16''', 1289-1296 (2019) and the documentation for the RunHarmony function): |
| 102 | {{{ |
| 103 | library(Seurat) |
| 104 | library(harmony) |
| 105 | |
| 106 | # Merge two or more Seurat objects, objA and objB, from different batches. |
| 107 | all <- merge(x=objA,y=objB,add.cell.ids=c("A","B")) |
| 108 | |
| 109 | # In anticipation of using Harmony to integrate data-sets below, first use Seurat to run PCA on the un-corrected data. |
| 110 | |
| 111 | all <- NormalizeData(all, normalization.method = "LogNormalize", scale.factor = 10000) |
| 112 | all <- FindVariableFeatures(all, selection.method = "vst", nfeatures = 2000) |
| 113 | all <- ScaleData(all, features = rownames(all)) |
| 114 | all <- RunPCA(all, features = VariableFeatures(object = all)) |
| 115 | |
| 116 | # Do the integration using Harmony, indexing samples by the batch slot: |
| 117 | all <- RunHarmony(all, "batch") |
| 118 | |
| 119 | # When generating UMAP or another embedding, be sure to use the integrated "harmony" reduction. |
| 120 | all <- RunUMAP(all,reduction = "harmony") |
| 121 | }}} |
| 122 | |
| 123 | * Using LIGER (v 0.4.2.9000) from within Seurat (please see J.D. Welsh ''et al.'' “Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain Cell Identity”, ''Nature Biotechnology'' '''37''', 1873–1887 (2019) and the documentation for the RunOptimizeALS and RunQuantileAlignSNF functions): |
| 124 | {{{ |
| 125 | library(Seurat) |
| 126 | library(SeuratWrappers) |
| 127 | library(liger) |
| 128 | |
| 129 | # Merge two or more Seurat objects, objA and objB, from different batches. |
| 130 | all <- merge(x=objA,y=objB,add.cell.ids=c("A","B")) |
| 131 | |
| 132 | # In anticipation of using LIGER to integrate data-sets below, first use Seurat to scale the data without centering. |
| 133 | |
| 134 | all <- NormalizeData(all, normalization.method = "LogNormalize", scale.factor = 10000) |
| 135 | all <- FindVariableFeatures(all, selection.method = "vst", nfeatures = 2000) |
| 136 | all <- ScaleData(all, do.center=FALSE, split.by = "batch") |
| 137 | |
| 138 | # Do the integration using LIGER, indexing samples by the batch slot: |
| 139 | all <- RunOptimizeALS(all, split.by = "batch") |
| 140 | all <- RunQuantileAlignSNF(all, split.by = "batch") |
| 141 | |
| 142 | # When generating UMAP or another embedding, be sure to use the reduction from integrated nonnegative factorization ("iNMF"). |
| 143 | all <- RunUMAP(all, dims = 1:ncol(all[["iNMF"]]), reduction = "iNMF") |
| 144 | }}} |