From 1662ef25390f9627e0264024b0046844b30717a7 Mon Sep 17 00:00:00 2001 From: sara castellano Date: Tue, 26 Apr 2022 16:12:41 +0200 Subject: [PATCH 1/3] add script to demultiplex sparse matrix --- demultiplex_sparse_matrix.R | 47 +++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 demultiplex_sparse_matrix.R diff --git a/demultiplex_sparse_matrix.R b/demultiplex_sparse_matrix.R new file mode 100644 index 0000000..cd4032d --- /dev/null +++ b/demultiplex_sparse_matrix.R @@ -0,0 +1,47 @@ +# set working directory and load rds object + setwd("/Your/Directory") + data <- readRDS("1867-counts_cells_cohort2.rds") + +# load required package + library("DropletUtils") + +# look at data structure + str(data) +# we can see that this is a dgCMatrix with gene symbols as rownames and cell barcodes as colnames +# let's look at the firs cell barcode as an example (BIOKEY_13_Pre_AAACCTGCAACAACCT-1) + colnames(data)[1] +# in this particular case, cell barcodes consist of a prefix (BIOKEY_13_Pre_) and a sequence of bases (AAACCTGCAACAACCT-1) +# the prefixes are sample names, so we'll use them to demultiplex the data + +# demultiplex data based on barcodes prefixes and export as 10X files + # use a regular expression to extract prefixes + data.pfx <- gsub("(.+)_[A-Z]+-1$", "\\1", colnames(data), perl=TRUE) + # get unique sample names + data.samples <- unique(data.pfx) + # check sample names (pay attention if are using this script to process a different dataset as the regular expression may need to be modified depending on the specific colnames) + head(data.samples) + tail(data.samples) + + # export as 10X files that can be directly uploaded to Cellenics + # define the function + # the function creates a subdirectory named "demultiplexed" inside the current working directory, and save 10X data for each sample in different subfolders + # if a folder named "demultiplexed" already exists, it will stop and return an error to avoid overwriting files + demultiplex_convert_to_10x <- function(obj, samples) { + if(!dir.exists(file.path(getwd(), "demultiplexed"))) { + dir.create(file.path(getwd(), "demultiplexed")) + } else { + print("WARNING! A demultiplexed directory already exists") + return() + } + for (i in 1:length(samples)) { + print(paste0("Converting sample ", samples[i])) + DropletUtils::write10xCounts(path = paste0(getwd(),"/demultiplexed/",samples[i]), x = obj[,grep(paste0("^",samples[i],".*"),colnames(obj))], type = "sparse", version="3") + } + } + + # run the function + demultiplex_convert_to_10x(obj = data, samples = data.samples) + + + + From 60a776fd31cfffd795f6ce3e7fafd9f07c9b8a5e Mon Sep 17 00:00:00 2001 From: sara castellano Date: Tue, 26 Apr 2022 16:33:58 +0200 Subject: [PATCH 2/3] test --- demultiplex_sparse_matrix.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demultiplex_sparse_matrix.R b/demultiplex_sparse_matrix.R index cd4032d..fcfdaa8 100644 --- a/demultiplex_sparse_matrix.R +++ b/demultiplex_sparse_matrix.R @@ -4,7 +4,7 @@ # load required package library("DropletUtils") - +# comment # look at data structure str(data) # we can see that this is a dgCMatrix with gene symbols as rownames and cell barcodes as colnames From ec3bbbda3334aaa02f85c717826a4608a43fc106 Mon Sep 17 00:00:00 2001 From: sara castellano Date: Tue, 26 Apr 2022 16:42:39 +0200 Subject: [PATCH 3/3] test changes --- demultiplex_sparse_matrix.R | 1 + 1 file changed, 1 insertion(+) diff --git a/demultiplex_sparse_matrix.R b/demultiplex_sparse_matrix.R index fcfdaa8..be5a2a1 100644 --- a/demultiplex_sparse_matrix.R +++ b/demultiplex_sparse_matrix.R @@ -5,6 +5,7 @@ # load required package library("DropletUtils") # comment +# comment 2 # look at data structure str(data) # we can see that this is a dgCMatrix with gene symbols as rownames and cell barcodes as colnames