From 1662ef25390f9627e0264024b0046844b30717a7 Mon Sep 17 00:00:00 2001
From: sara castellano <sara@biomage.net>
Date: Tue, 26 Apr 2022 16:12:41 +0200
Subject: [PATCH 1/3] add script to demultiplex sparse matrix

---
 demultiplex_sparse_matrix.R | 47 +++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 demultiplex_sparse_matrix.R

diff --git a/demultiplex_sparse_matrix.R b/demultiplex_sparse_matrix.R
new file mode 100644
index 0000000..cd4032d
--- /dev/null
+++ b/demultiplex_sparse_matrix.R
@@ -0,0 +1,47 @@
+# set working directory and load rds object
+  setwd("/Your/Directory")
+  data <- readRDS("1867-counts_cells_cohort2.rds")
+
+# load required package
+  library("DropletUtils")
+
+# look at data structure
+  str(data)
+# we can see that this is a dgCMatrix with gene symbols as rownames and cell barcodes as colnames
+# let's look at the firs cell barcode as an example (BIOKEY_13_Pre_AAACCTGCAACAACCT-1)
+  colnames(data)[1]
+# in this particular case, cell barcodes consist of a prefix (BIOKEY_13_Pre_) and a sequence of bases (AAACCTGCAACAACCT-1)
+# the prefixes are sample names, so we'll use them to demultiplex the data
+
+# demultiplex data based on barcodes prefixes and export as 10X files
+  # use a regular expression to extract prefixes
+  data.pfx <- gsub("(.+)_[A-Z]+-1$", "\\1", colnames(data), perl=TRUE)
+  # get unique sample names
+  data.samples <- unique(data.pfx)
+  # check  sample names (pay attention if are using this script to process a different dataset as the regular expression may need to be modified depending on the specific colnames)
+  head(data.samples)
+  tail(data.samples)
+
+  # export as 10X files that can be directly uploaded to Cellenics
+  # define the function
+    # the function creates a subdirectory named "demultiplexed" inside the current working directory, and save 10X data for each sample in different subfolders
+    # if a folder named "demultiplexed" already exists, it will stop and return an error to avoid overwriting files
+  demultiplex_convert_to_10x <- function(obj, samples) {
+          if(!dir.exists(file.path(getwd(), "demultiplexed"))) {
+          dir.create(file.path(getwd(), "demultiplexed"))
+        } else {
+          print("WARNING! A demultiplexed directory already exists")
+          return()
+        }
+        for (i in 1:length(samples)) {
+        print(paste0("Converting sample ", samples[i]))
+        DropletUtils::write10xCounts(path = paste0(getwd(),"/demultiplexed/",samples[i]), x = obj[,grep(paste0("^",samples[i],".*"),colnames(obj))], type = "sparse", version="3")
+        }
+  }
+
+  # run the function
+  demultiplex_convert_to_10x(obj = data, samples = data.samples)
+
+
+
+

From 60a776fd31cfffd795f6ce3e7fafd9f07c9b8a5e Mon Sep 17 00:00:00 2001
From: sara castellano <sara@biomage.net>
Date: Tue, 26 Apr 2022 16:33:58 +0200
Subject: [PATCH 2/3] test

---
 demultiplex_sparse_matrix.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demultiplex_sparse_matrix.R b/demultiplex_sparse_matrix.R
index cd4032d..fcfdaa8 100644
--- a/demultiplex_sparse_matrix.R
+++ b/demultiplex_sparse_matrix.R
@@ -4,7 +4,7 @@
 
 # load required package
   library("DropletUtils")
-
+# comment
 # look at data structure
   str(data)
 # we can see that this is a dgCMatrix with gene symbols as rownames and cell barcodes as colnames

From ec3bbbda3334aaa02f85c717826a4608a43fc106 Mon Sep 17 00:00:00 2001
From: sara castellano <sara@biomage.net>
Date: Tue, 26 Apr 2022 16:42:39 +0200
Subject: [PATCH 3/3] test changes

---
 demultiplex_sparse_matrix.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/demultiplex_sparse_matrix.R b/demultiplex_sparse_matrix.R
index fcfdaa8..be5a2a1 100644
--- a/demultiplex_sparse_matrix.R
+++ b/demultiplex_sparse_matrix.R
@@ -5,6 +5,7 @@
 # load required package
   library("DropletUtils")
 # comment
+# comment 2
 # look at data structure
   str(data)
 # we can see that this is a dgCMatrix with gene symbols as rownames and cell barcodes as colnames