Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
logs
.snakemake
site
__pycache__
output
.tests/illumnia_demux/dry_run_out
.tests/dry_run_out
19 changes: 5 additions & 14 deletions docs/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,14 @@ Biowulf uses environmental modules to control software. After executing the abov
> [+] Loading singularity 4.X.X on cnXXXX

```bash title="<b>Bigsky</b>"
source /data/openomics/bin/dependencies.sh`
source /data/openomics/bin/source_weave.sh
```

Bigsky uses spack to load modules so a consolidated conda environment with snakemake is activated:
Bigsky uses the same lua module load system that biowulf uses for weave.

```bash title="dependencies.sh"
if [ ! -x "$(command -v "snakemake")" ]; then
source /gs1/apps/user/rmlspack/share/spack/setup-env.sh
export PS1="${PS1:-}"
spack load -r miniconda3@4.11.0/y4vyh4u
source activate snakemake7-19-1
fi
# Add this folder to $PATH
export PATH="/data/openomics/bin:${PATH}"
# Add different pipelines to $PATH
export PATH="/data/openomics/prod/rna-seek/latest:${PATH}"
export PATH="/data/openomics/prod/metavirs/latest:${PATH}"
module load snakemake/7.22.0-ufanewz
pip install -r /data/openomics/prod/weave/latest/requirements.txt
```

While, singularity is installed to the **BigSky** system and available upon login.
Expand All @@ -73,4 +64,4 @@ cd weave # git repository root
-s .tests/illumnia_demux \
-o .tests/illumnia_demux/dry_run_out \
--local --dry-run /opt2/.tests/illumnia_demux
```
```
20 changes: 10 additions & 10 deletions scripts/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def get_current_server():
# biowulf hostnames
re_biowulf_head = (r"biowulf\.nih\.gov", "biowulf")
re_biowulf_compute = (r"cn\d{4}", "biowulf")

# skyline hostnames
re_skyline_head = (r"ai-hpc(submit|n)(\d+)?", "skyline")
re_skyline_compute = (r"ai-hpc(submit|n)(\d+)?", "skyline")
Expand All @@ -48,15 +48,15 @@ def get_current_server():
FRCE_PATH = "COVID-19_Consortium"


# ~~~ labkey configurations ~~~
# ~~~ labkey configurations ~~~
CONTEXT_PATH = "labkey"
LABKEY_CONFIGS = {
"bigsky": {"domain": BIGSKY_DEV, "container_path": BIGSKY_PATH, "context_path": CONTEXT_PATH, "use_ssl": True},
"frce": {"domain": FRCE_PROD, "container_path": FRCE_PATH, "context_path": CONTEXT_PATH, "use_ssl": True}
}


# ~~~ snakemake configurations ~~~
# ~~~ snakemake configurations ~~~
illumina_pipelines = defaultdict(lambda: Path(Path(__file__).parent.parent, "workflow", "Snakefile").resolve())
# can add support for NextSeq2k and bclconvert here
SNAKEFILE = {
Expand All @@ -69,7 +69,7 @@ def get_current_server():


def get_resource_config():
"""Return a dictionary containing server specific references utilized in
"""Return a dictionary containing server specific references utilized in
the workflow for directories or reference files.

Returns:
Expand Down Expand Up @@ -131,7 +131,7 @@ def get_bigsky_seq_dirs():
Returns:
(list): list of `pathlib.Path`s of all sequencing directories on bigsky server
"""
top_dir = Path("/gs1/RTS/NextGen/SequencerRuns/")
top_dir = Path("/data/rml_ngs/SequencerRuns/")
transfer_breadcrumb = "RTAComplete.txt"
if not top_dir.exists():
return None
Expand All @@ -141,7 +141,7 @@ def get_bigsky_seq_dirs():
for this_child_elem in this_dir.iterdir():
try:
elem_checks = [
this_child_elem.is_dir(),
this_child_elem.is_dir(),
Path(this_child_elem, transfer_breadcrumb).exists(),
check_access(this_child_elem, R_OK)
]
Expand All @@ -155,13 +155,13 @@ def get_bigsky_seq_dirs():
def get_tmp_dir(host):
TMP_CONFIGS = {
'skyline': {'user': '/data/scratch/$USER/$SLURM_JOBID', 'global': '/data/scratch/$USER/' + str(uuid4())},
'bigsky': {'user': '/gs1/Scratch/$USER/$SLURM_JOBID', 'global': '/gs1/Scratch/$USER/' + str(uuid4())},
'bigsky': {'user': '/data/scratch/$USER/$SLURM_JOBID', 'global': '/data/scratch/$USER/' + str(uuid4())},
'biowulf': {'user': '/lscratch/$SLURM_JOBID', 'global': '/tmp/$USER/' + str(uuid4())}
}

this_tmp = TMP_CONFIGS[host]['user']

# this directory, if it does not exist,
# this directory, if it does not exist,
if Path(this_tmp).parents[0].exists():
return this_tmp
else:
Expand All @@ -170,7 +170,7 @@ def get_tmp_dir(host):

DIRECTORY_CONFIGS = {
"bigsky": {
"seqroot": "/gs1/RTS/NextGen/SequencerRuns/",
"seqroot": "/data/rml_ngs/SequencerRuns",
"seq": get_bigsky_seq_dirs(),
"profile": Path(Path(__file__).parent.parent, "utils", "profiles", "bigsky").resolve(),
},
Expand Down Expand Up @@ -222,4 +222,4 @@ def get_tmp_dir(host):
"mesaur": "/data/openomics/references/genomes/mesaur/2.0/GCF_017639785.1_BCM_Maur_2.0_genomic.fna.gz",
"cynomac": "/data/openomics/references/genomes/cynomac/v2/GCF_012559485.2_MFA1912RKSv2_genomic.fna.gz",
},
}
}
16 changes: 8 additions & 8 deletions scripts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

def get_all_seq_dirs(top_dir, server):
"""
Gather and return all sequencing directories from the `top_dir`.
Gather and return all sequencing directories from the `top_dir`.
This is tightly coupled at the moment to the directory that is on RML-BigSky.
In the future will need to the take a look at how to do this more generally
"""
Expand Down Expand Up @@ -42,7 +42,7 @@ def valid_run_output(output_directory, dry_run=False):
output_directory = Path(output_directory).absolute()
if not output_directory.exists():
output_directory.mkdir(parents=True, mode=0o765)

if not check_access(output_directory, W_OK):
raise PermissionError(f'Can not write to output directory {output_directory}')
return output_directory
Expand Down Expand Up @@ -70,7 +70,7 @@ def valid_fasta(suspect):

if not is_valid:
raise ValueError

return suspect


Expand Down Expand Up @@ -147,7 +147,7 @@ def find_demux_dir(run_dir):

if len(demux_stat_files) != 1:
raise FileNotFoundError

return Path(demux_stat_files[0], '..').absolute()


Expand All @@ -160,7 +160,7 @@ def get_run_directories(runids, seq_dir=None, sheetname=None):
for secondchild in firstchild.iterdir():
seq_contents.append(secondchild)
seq_contents_names = [child for child in map(lambda d: d.name, seq_contents)]

run_paths, invalid_runs = [], []
run_return = []
for run in runids:
Expand Down Expand Up @@ -192,7 +192,7 @@ def get_run_directories(runids, seq_dir=None, sheetname=None):
sheet = Path(run_p, sheetname).absolute()
else:
raise FileNotFoundError(f'Run {rid}({run_p}) does not have a find-able sample sheet.')

this_run_info['samplesheet'] = parse_samplesheet(sheet)
this_run_info.update({info.tag: info.text for run in runinfo_xml.getroot() for info in run \
if info.text is not None and info.text.strip() not in ('\n', '')})
Expand All @@ -201,5 +201,5 @@ def get_run_directories(runids, seq_dir=None, sheetname=None):
if invalid_runs:
raise ValueError('Runs entered are invalid (missing sequencing artifacts or directory does not exist): \n' + \
', '.join(invalid_runs))
return run_return

return run_return
22 changes: 11 additions & 11 deletions scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def valid_run_input(run):


def exec_snakemake(popen_cmd, local=False, dry_run=False, env=None, cwd=None):
# async execution w/ filter:
# async execution w/ filter:
# - https://gist.github.com/DGrady/b713db14a27be0e4e8b2ffc351051c7c
# - https://lysator.liu.se/~bellman/download/asyncproc.py
# - https://gist.github.com/kalebo/1e085ee36de45ffded7e5d9f857265d0
Expand All @@ -113,7 +113,6 @@ def exec_snakemake(popen_cmd, local=False, dry_run=False, env=None, cwd=None):
popen_kwargs['cwd'] = cwd
else:
popen_kwargs['cwd'] = str(Path.cwd())

parent_jobid = None
if local or dry_run:
popen_kwargs['env'].update(os.environ)
Expand Down Expand Up @@ -170,9 +169,7 @@ def get_mods(init=False):
mod_cmd = []

if host == 'bigsky':
mod_cmd.append('source /gs1/apps/user/rmlspack/share/spack/setup-env.sh')
mod_cmd.append('spack load miniconda3@4.11.0')
mod_cmd.append('source activate snakemake7-19-1')
mod_cmd.append('module load snakemake/7.22.0-ufanewz')
elif host == 'skyline':
mod_cmd.append('source /data/openomics/bin/dependencies.sh')
elif host == 'biowulf':
Expand Down Expand Up @@ -220,8 +217,9 @@ def get_mounts(*extras):
raise FileNotFoundError(f"Can't mount {str(bind)}, it doesn't exist!")
file_to, file_from, mode = str(bind), str(bind), 'rw'
mounts.append(file_from + ':' + file_to + ':' + mode)

mounts.append(r'\$TMPDIR:/tmp:rw')

if 'TMPDIR' in os.environ:
mounts.append(os.environ['TMPDIR'] + ':/tmp:rw')

return ','.join(mounts)

Expand Down Expand Up @@ -265,13 +263,15 @@ def exec_pipeline(configs, dry_run=False, local=False):
top_env['PATH'] = os.environ["PATH"]
top_env['SNK_CONFIG'] = str(config_file.absolute())
top_env['SINGULARITY_CACHEDIR'] = str(Path(this_config['out_to'], '.singularity').absolute())
top_env['SINGULARITY_CONTAINALL'] = '1'
top_env['APPTAINER_CONTAINALL'] = '1'
this_cmd = [
"snakemake", "-p", "--use-singularity", "--rerun-incomplete", "--keep-incomplete",
"--rerun-triggers", "mtime", "--verbose", "-s", snake_file,
"snakemake", "-p", "--cores", "2", "--use-singularity", "--rerun-incomplete", "--keep-incomplete",
"--rerun-triggers", "mtime", "--verbose", "-s", str(snake_file),
]

if singularity_binds and not dry_run:
this_cmd.extend(["--singularity-args", f"\"--env 'TMPDIR=/tmp' -C -B '{singularity_binds}'\""])
this_cmd.extend(["--singularity-args", f"\"-B '{singularity_binds}'\""])

if dry_run:
print(f"{esc_colors.OKGREEN}> {esc_colors.ENDC}{esc_colors.UNDERLINE}Dry run{esc_colors.ENDC} " + \
Expand Down Expand Up @@ -314,4 +314,4 @@ def valid_host_pathogen_genomes(host, pathogen):
if not g2:
raise ValueError('Pathogen genome does not exist on the file system.')

return host, pathogen
return host, pathogen
2 changes: 1 addition & 1 deletion utils
4 changes: 2 additions & 2 deletions workflow/fastq.smk
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ rule kaiju_annotation:
log: config['out_to'] + "/logs/" + config["project"] + "/kaiju/{sids}.log",
threads: 24
resources:
mem_mb = 220000,
mem_mb = 300000,
runtime = 60*24*2
shell:
"""
Expand Down Expand Up @@ -109,7 +109,7 @@ rule kraken_annotation:
log: config['out_to'] + "/logs/" + config["project"] + "/kraken/{sids}.log",
threads: 24
resources:
mem_mb = 220000,
mem_mb = 300000,
runtime = 60*24*2
shell:
"""
Expand Down