From 3b8e5b985fea96104379737aec56774a3dc87a74 Mon Sep 17 00:00:00 2001 From: Nikolai Karpov Date: Fri, 12 Apr 2024 16:00:01 -0400 Subject: [PATCH 1/2] update readme --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ca54e6a..fd714af 100644 --- a/README.md +++ b/README.md @@ -82,9 +82,15 @@ Note: Read ids can be obtained with seqkit: ```seqkit seq -ni > -t -d -m -b +herro inference --read-alns -t -d -m -b [-c ] ``` -Note: GPUs are specified using their IDs. For example, if the value of the parameter -d is set to 0,1,3, herro will use the first, second, and fourth GPU cards. Parameter ```-t``` is given **per device** - e.g., if ```-t``` is set to ```8``` and 3 GPUs are used, herro will create 24 feature generation theads in total. Recommended batch size is 64 for GPUs with 40 GB (possibly also for 32 GB) of VRAM and 128 for GPUs with 80 GB of VRAM. +Note: GPUs are specified using their IDs. For example, if the value of the parameter -d is set to 0,1,3, herro will use the first, second, and fourth GPU cards. Parameter ```-t``` is given **per device** - e.g., if ```-t``` is set to ```8``` and 3 GPUs are used, herro will create 24 feature generation theads in total. Recommended batch size is 64 for GPUs with 40 GB (possibly also for 32 GB) of VRAM and 128 for GPUs with 80 GB of VRAM. With the flag `-c`, the app gets information about the cluster and its neighborhood from the specified file. It then loads only the reads associated with the cluster and its neighborhood into RAM, and outputs corrected reads for the cluster. The file should contain multiple lines formatted as `0\t` for IDs within the cluster and `1\t` for IDs in the neighborhood. The partitioning into clusters can be generated using the command: + +```shell +zstdcat *.paf.zst | cut -f1,6 | python scripts/create_clusters.py +``` + +This command outputs files with clusters into the folder `clusters`. ## Results on HG002 data From 3ef64d3a1d3fd499aec3696d6ae3203d76d87298 Mon Sep 17 00:00:00 2001 From: Nikolai Karpov Date: Mon, 15 Apr 2024 17:12:27 +0100 Subject: [PATCH 2/2] update extensions --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 6620143..f7ead45 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -214,7 +214,7 @@ fn parse_reads>(reads_path: P, window_size: u32, core: &Option = glob(&g).unwrap() .filter_map(|p| p.ok().and_then(|path| path.to_str().map(|s| s.to_owned()))) - .filter(|s| s.ends_with(".fastq") || s.ends_with(".fastq.gz")) + .filter(|s| s.ends_with(".fastq") || s.ends_with(".fastq.gz") || s.ends_with(".fq") || s.ends_with(".fq.gz")) .flat_map(|s| haec_io::get_reads(&s, window_size, core, neighbour)) .collect(); set_parse_reads_spinner_finish(reads.len(), spinner);