From 3ae11e58f4f3ae8764f92981557771674f8fee6f Mon Sep 17 00:00:00 2001 From: Mitchob Date: Wed, 10 Jun 2026 10:56:27 +1000 Subject: [PATCH 1/3] splitting out documentation into seperate pages --- _data/sidebars/main.yml | 46 +++-- pages/flavours.md | 110 ++++++++++++ pages/interactive.md | 56 +++++++ pages/nextflow.md | 56 +++++++ pages/tools.md | 197 ++++++++++++++++++++++ pages/using-bioshell.md | 361 +--------------------------------------- 6 files changed, 462 insertions(+), 364 deletions(-) create mode 100644 pages/flavours.md create mode 100644 pages/interactive.md create mode 100644 pages/nextflow.md create mode 100644 pages/tools.md diff --git a/_data/sidebars/main.yml b/_data/sidebars/main.yml index 0686c9e..0c719f5 100644 --- a/_data/sidebars/main.yml +++ b/_data/sidebars/main.yml @@ -16,19 +16,43 @@ subitems: url: /using-bioshell subitems: - title: Choosing a flavour - url: /using-bioshell#flavours - - title: Reproducible tools - url: /using-bioshell#tools - - title: Bio-Shelley - url: /using-bioshell#bio-shelley - - title: Reference datasets - url: /using-bioshell#reference-data + url: /flavours + subitems: + - title: Flavour types + url: /flavours#flavour-types + - title: Quick sizing guide + url: /flavours#sizing-guide + - title: Worked examples + url: /flavours#worked-examples + - title: How to choose + url: /flavours#how-to-choose + - title: Tools and reference data + url: /tools + subitems: + - title: Verify CVMFS + url: /tools#cvmfs + - title: Find and install with Bio-Shelley + url: /tools#bio-shelley + - title: Load a tool + url: /tools#load-tool + - title: Reference datasets + url: /tools#reference-data + - title: Advanced — manual SHPC + url: /tools#shpc-manual - title: Interactive environments - url: /using-bioshell#interactive + url: /interactive + subitems: + - title: JupyterLab + url: /interactive#jupyterlab + - title: RStudio + url: /interactive#rstudio - title: Using Nextflow with CVMFS - url: /using-bioshell#nextflow - - title: Advanced — manual SHPC - url: /using-bioshell#shpc-manual + url: /nextflow + subitems: + - title: CVMFS containers + url: /nextflow#cvmfs-containers + - title: SHPC modules + url: /nextflow#shpc-module - title: BioShell in practice url: /community subitems: diff --git a/pages/flavours.md b/pages/flavours.md new file mode 100644 index 0000000..80263ab --- /dev/null +++ b/pages/flavours.md @@ -0,0 +1,110 @@ +--- +title: Choosing a flavour +description: How to select the right combination of vCPUs and memory for your BioShell environment. +--- + +A flavour is the combination of virtual CPUs (vCPUs) and memory (RAM) allocated to your +BioShell environment. BioShell is a shared national resource, so you are encouraged to +request a flavour that closely matches your actual needs — this keeps capacity available for +everyone. + +Estimating your requirements can be hard at the start of a project. Use the guidance below +to make a reasonable starting choice. You can always request a larger flavour later if your +analysis grows. + +--- + +## Flavour types at a glance {#flavour-types} + +| Type | Characteristics | Best for | +|------|----------------|----------| +| **t3** | Low memory relative to CPUs | Testing, small jobs, getting started | +| **m3** | Balanced CPU and memory | Most general-purpose workloads | +| **c3** | CPU-optimised (same memory ratio as m3) | Compute-heavy tasks such as alignment and assembly | +| **r3** | High memory per CPU | Memory-intensive analysis such as variant calling or large data processing | + +### Available flavours + +| vCPUs | RAM (GB) | Nectar flavours | Nirin flavours | +|-------|----------|----------------|----------------| +| 1 | 1 | t3.xsmall | c3.1c1m5d | +| 1 | 2 | m3.xsmall, c3.xsmall | c3.1c2m10d | +| 1 | 4 | r3.xsmall | — | +| 2 | 4 | m3.small, c3.small | c3.2c4m20d, c3.2c4m10d | +| 2 | 8 | r3.small | — | +| 4 | 8 | m3.medium, c3.medium | c3.4c8m20d, c3.4c8m10d | +| 4 | 16 | r3.medium | — | +| 8 | 16 | m3.large, c3.large | c3.8c16m20d, c3.8c16m10d | +| 8 | 32 | r3.large | — | +| 16 | 32 | m3.xlarge, c3.xlarge | — | +| 16 | 64 | r3.xlarge | — | +| 32 | 64 | m3.xxlarge, c3.xxlarge | — | +| 32 | 128 | r3.xxlarge | — | +| 64 | 128 | c3.3xlarge | — | + +--- + +## Quick sizing guide {#sizing-guide} + +| Task type | Suggested resources | +|-----------|-------------------| +| Light preprocessing (QC, trimming, filtering) | 1–4 vCPUs, 2–8 GB RAM | +| Alignment and assembly (e.g. `bwa`, `STAR`, `SPAdes`) | 8–16 vCPUs, 16–32 GB RAM | +| Memory-intensive analysis (variant calling, genome-wide statistics) | 16–32+ vCPUs, 32–128 GB RAM | +| Interactive analysis and visualisation (JupyterLab, RStudio) | 2–4 vCPUs, 8–16 GB RAM | + +--- + +## Worked examples {#worked-examples} + +### Light processing + +John is starting a project analysing drought-resistant genes from 20 crop samples (~7 GB +raw sequencing data per sample). His pipeline includes quality control with `FastQC`, +adapter trimming, alignment to a reference genome, annotation, and phylogenetic tree +construction. + +The most resource-intensive steps require moderate CPU and memory. Because John is analysing +a subset of genes rather than whole genomes, each run is relatively small: + +- 2–4 vCPUs +- Up to 10 GB RAM + +A balanced `m3` flavour is a good starting point: + +- **Nectar:** `m3.medium` (4 vCPUs / 8 GB RAM) +- **Nirin:** `c3.4c8m10d` or `c3.4c8m20d` (4 vCPUs / 8 GB RAM) + +### Memory-intensive processing + +Georgie is running `GATK4` best-practice variant calling on 15 human exome samples. Each +sample produces BAM files of ~15 GB, with similar-sized temporary files during processing. +Total storage is approximately 1 TB. `GATK4` tools benefit from both high memory and +multiple CPU cores: + +- 8 vCPUs +- 32 GB RAM + +Recommended flavours for ~15 exomes: + +- **Nectar:** `r3.large` (8 vCPUs / 32 GB RAM) +- **Nirin:** `c3.8c16m20d` or `c3.8c16m10d` (8 vCPUs / 16 GB RAM) + +If processing ~30 exomes or running multiple jobs in parallel, consider `r3.xlarge` +(16 vCPUs / 64 GB RAM) on Nectar, or multiple 8 vCPU Nirin instances. + +--- + +## How to choose if you are unsure {#how-to-choose} + +1. **Check software documentation** — most bioinformatics tools publish minimum and + recommended system requirements. +2. **Start small and scale up** — begin with a smaller flavour. If jobs run slowly with + CPU usage consistently near 100%, you need more vCPUs. If jobs fail with memory errors, + you need more RAM. +3. **Review previous runs** — if you have run similar analyses before, check your peak CPU + and RAM usage from those logs to guide your estimate. + +> **Note:** For interactive work in JupyterLab or RStudio, 2–4 vCPUs and 8–16 GB RAM is +> sufficient for most datasets. Increase memory if you are loading large files directly +> into your session. diff --git a/pages/interactive.md b/pages/interactive.md new file mode 100644 index 0000000..c0e35b0 --- /dev/null +++ b/pages/interactive.md @@ -0,0 +1,56 @@ +--- +title: Interactive environments +description: How to use JupyterLab and RStudio browser-based coding environments in BioShell. +--- + +BioShell supports two browser-based interactive environments for notebook and script-based +work. Both run on your BioShell instance and are accessible through your browser once you +have connected via SSH. + +> **Important:** You must have an active SSH connection to your BioShell instance before +> opening either environment in your browser. See [Connecting to BioShell](/access#connecting). + +--- + +## JupyterLab {#jupyterlab} + +JupyterLab is a browser-based environment for writing and running Python, R, and shell +notebooks. It is well suited to exploratory analysis, visualisation, and combining code +with documentation. + +Open your browser and go to: + +``` +http://:8888 +``` + +![](images/bioshell/SCREENSHOT_NEEDED_jupyterlab.png) + +*Fig 4. JupyterLab open in a browser connected to a BioShell instance.* + +--- + +## RStudio {#rstudio} + +RStudio is a browser-based environment for R-based analysis. If you are familiar with the +RStudio desktop application, the browser version works in exactly the same way. + +Open your browser and go to: + +``` +http://:8787 +``` + +![](images/bioshell/SCREENSHOT_NEEDED_rstudio.png) + +*Fig 5. RStudio open in a browser connected to a BioShell instance.* + +--- + +> **Tip:** Your SSH terminal and browser environments connect to the same BioShell instance. +> Any tool you have loaded with `module load` in your terminal is also available inside +> JupyterLab and RStudio. + +> **Note:** If you cannot reach JupyterLab or RStudio in your browser, check that your SSH +> connection is still active. If you are connecting from outside your institution's network, +> you may need an SSH tunnel. Contact your local IT support for help with this. diff --git a/pages/nextflow.md b/pages/nextflow.md new file mode 100644 index 0000000..3f6d72f --- /dev/null +++ b/pages/nextflow.md @@ -0,0 +1,56 @@ +--- +title: Using Nextflow with CVMFS +description: How to run Nextflow workflows on BioShell using containers already available in CVMFS instead of downloading them. +--- + +If you are running a Nextflow workflow, you can direct processes to use containers already +in CVMFS rather than downloading them. This saves both time and storage. + +Nextflow runs each workflow process in its own environment. When using containers, it normally +pulls the required image automatically. On BioShell, you can override this behaviour by +creating a config file that points processes to the existing CVMFS images. + +> **Note:** Nextflow always prioritises a `-config` file passed on the command line over the +> workflow's built-in `nextflow.config`. + +--- + +## Using containers directly from CVMFS {#cvmfs-containers} + +Create a config file (e.g. `cvmfs_path.config`) that points a process to the container +stored in CVMFS: + +```groovy +process { + withName: 'FASTQC' { + container = 'file:///cvmfs/singularity.galaxyproject.org/all/fastqc:0.12.1--hdfd78af_0' + } +} +``` + +--- + +## Using an installed SHPC module {#shpc-module} + +Alternatively, if you have already installed a tool via Bio-Shelley or SHPC, you can +reference it by module name instead of a container path: + +```groovy +process { + withName: 'FASTQC' { + module = 'fastqc/0.11.9' + } +} +``` + +> **Note:** The `module` value must match exactly what appears in `module avail`. + +--- + +## Running with your config override {#run} + +Pass your config file on the command line when running the workflow: + +```bash +nextflow run main.nf -profile singularity -config cvmfs_path.config +``` diff --git a/pages/tools.md b/pages/tools.md new file mode 100644 index 0000000..3ce2cd7 --- /dev/null +++ b/pages/tools.md @@ -0,0 +1,197 @@ +--- +title: Tools and reference data +description: How to find, install, and load bioinformatics tools and reference datasets in BioShell using CVMFS, Bio-Shelley, and SHPC. +--- + +BioShell gives you access to thousands of bioinformatics tools and reference datasets through +[CernVM-FS (CVMFS)](https://cvmfs.readthedocs.io/en/stable/), a read-only shared file system +that delivers containerised tools and reference data on demand. Rather than downloading or +installing software yourself, you load tools as modules — they are pulled from CVMFS only when +you need them. + +Through CVMFS, BioShell connects to three repositories: + +| Repository | Contents | +|-----------|----------| +| `singularity.galaxyproject.org` | Containerised tools from [BioContainers](https://biocontainers.pro/) | +| `data.galaxyproject.org` | Reference genome builds and pre-built indexes from the Galaxy Project | +| `data.biocommons.aarnet.edu.au` | [AUTHOR TO SUPPLY — confirm final name and description] | + +> **Note:** CVMFS does not download everything in advance. It fetches files only when you +> access them, so the first time you use a tool or dataset it may take a moment to load. +> Subsequent access is faster because the files are cached locally. + +--- + +## Step 1 — Verify your CVMFS connection {#cvmfs} + +When you first connect to BioShell you will see the following welcome message: + +``` +################################################################## +# # +# Welcome to BioShell! # +# # +# A range of commonly used bioinformatics software is available # +# via CVMFS. # +# # +# To make the software folders visible, run: # +# cvmfs_config probe # +# # +# If you need help finding and installing software, # +# ask Shelley-Bio # +# _ .----. # +# (_ \/ \_, # +# `uu----uu' # +# Basic commands: # +# shelley-bio find # +# shelley-bio search "" # +# shelley-bio versions # +# shelley-bio build # +# shelley-bio interactive # +################################################################## +``` + +Run the probe command to confirm CVMFS is connected: + +```bash +cvmfs_config probe +``` + +You should see `OK` for each repository. If a repository shows `Failed!`, wait a moment and +try again. Contact [Australian BioCommons support](https://www.biocommons.org.au/helpdesk) +if the problem persists. + +--- + +## Step 2 — Find and install tools with Bio-Shelley {#bio-shelley} + +**Bio-Shelley** is BioShell's built-in assistant for finding and installing tools. It indexes +over 700 tools and 118,000 container versions from the BioContainers catalogue so you do not +need to know container paths or SHPC commands. + +You can use Bio-Shelley in two ways: + +**Directly from the command line:** + +```bash +shelley-bio find +shelley-bio search "" +shelley-bio versions +shelley-bio build +``` + +**Or in interactive mode:** + +```bash +shelley-bio interactive +``` + +| Command | What it does | Example | +|---------|-------------|---------| +| `find ` | Look up a specific tool by name | `shelley-bio find fastqc` | +| `search ""` | Search by keyword or function | `shelley-bio search "quality control"` | +| `versions ` | List all available versions | `shelley-bio versions samtools` | +| `build ` | Install the tool as a loadable module | `shelley-bio build samtools/1.21` | +| `interactive` | Launch Bio-Shelley in interactive mode | `shelley-bio interactive` | + +The `build` command handles the full installation automatically — it finds the correct +container in CVMFS, generates the module file, and makes the tool ready to load. + +![](images/bioshell/SCREENSHOT_NEEDED_bioshelley_build.png) + +*Fig 3. Using `shelley-bio build samtools/1.21` to install a tool module.* + +> **Tip:** Use `shelley-bio search` when you know what you want to do but not the tool name. +> For example, `shelley-bio search "variant calling"` returns tools relevant to that task. + +--- + +## Step 3 — Load a tool {#load-tool} + +Once Bio-Shelley has built a module, load it with: + +```bash +module use ~/shpc/modules +module load / +``` + +Verify the tool is ready: + +```bash + --version +``` + +> **Note:** Run `module use ~/shpc/modules` each session, or add it to your `~/.bashrc` +> to make it permanent. + +--- + +## Reference datasets {#reference-data} + +CVMFS also provides access to reference genome builds and pre-built indexes from the Galaxy +Project. You can use these directly in your analyses by referencing their absolute file path. + +List the available reference data: + +```bash +ls /cvmfs/data.galaxyproject.org/byhand/ +ls /cvmfs/data.galaxyproject.org/managed/ +``` + +| Directory | Contents | +|-----------|---------| +| `/managed` | Datasets generated with Galaxy Data Manager tools. Organised by index type, then genome build. | +| `/byhand` | Older, manually curated datasets. Organised by genome build, then index type. | + +To use a reference file in your analysis, pass its absolute path directly to your tool. +For example, the human CHM13 T2T v2.0 FASTA file is at: + +``` +/cvmfs/data.galaxyproject.org/byhand/CHM13_T2T_v2.0/seq/CHM13_T2T_v2.0.fa +``` + +> **Note:** The reference datasets available through CVMFS are those hosted by the Galaxy +> Project and may not be comprehensive. This is not a replacement for your institution's +> primary data access methods. + +--- + +## Advanced: manual SHPC commands {#shpc-manual} + +If you need to install a tool version that Bio-Shelley cannot find, or you prefer to work +directly with SHPC, use the steps below. + +> **Tip:** Bio-Shelley handles most cases, including older tool versions not in the default +> registry. Try `shelley-bio versions ` before attempting manual installation. + +**Load SHPC:** + +```bash +module load shpc +``` + +**Search for a tool and view versions:** + +```bash +shpc show -f +shpc show quay.io/biocontainers/ +``` + +**Install a module directly from CVMFS:** + +```bash +shpc install quay.io/biocontainers/: \ + /cvmfs/singularity.galaxyproject.org/all/: \ + --keep-path +``` + +**Make the module available and load it:** + +```bash +module use ~/shpc/modules +module load quay.io/biocontainers// +``` + +For further documentation see the +[SHPC user guide](https://singularity-hpc.readthedocs.io/en/latest/getting_started/user-guide.html). diff --git a/pages/using-bioshell.md b/pages/using-bioshell.md index 7d873f7..6424ff9 100644 --- a/pages/using-bioshell.md +++ b/pages/using-bioshell.md @@ -1,361 +1,16 @@ --- title: Using BioShell -description: How to choose a flavour, load tools, use interactive environments, and get - help from Bio-Shelley in BioShell. +description: Overview of BioShell's core features — tools, interactive environments, reference data, and workflow integration. --- -## Overview - Once you are connected to your BioShell environment, you have access to a curated library of bioinformatics tools, interactive coding environments, and an AI-assisted support agent. -This page walks you through each feature. - ---- - -## Choosing a flavour {#flavours} - -A flavour is the combination of virtual CPUs (vCPUs) and memory (RAM) allocated to your -BioShell environment. BioShell is a shared national resource, so you are encouraged to -request a flavour that closely matches your actual needs — this keeps capacity available for -everyone. - -Estimating your requirements can be hard at the start of a project. Use the guidance below -to make a reasonable starting choice. You can always request a larger flavour later if your -analysis grows. - -### Flavour types at a glance - -| Type | Characteristics | Best for | -|------|----------------|----------| -| **t3** | Low memory relative to CPUs | Testing, small jobs, getting started | -| **m3** | Balanced CPU and memory | Most general-purpose workloads | -| **c3** | CPU-optimised (same memory ratio as m3) | Compute-heavy tasks such as alignment and assembly | -| **r3** | High memory per CPU | Memory-intensive analysis such as variant calling or large data processing | - -### Quick sizing guide - -| Task type | Suggested resources | -|-----------|-------------------| -| Light preprocessing (QC, trimming, filtering) | 1–4 vCPUs, 2–8 GB RAM | -| Alignment and assembly (e.g. `bwa`, `STAR`, `SPAdes`) | 8–16 vCPUs, 16–32 GB RAM | -| Memory-intensive analysis (variant calling, genome-wide statistics) | 16–32+ vCPUs, 32–128 GB RAM | -| Interactive analysis and visualisation (JupyterLab, RStudio) | 2–4 vCPUs, 8–16 GB RAM | - -### Worked examples - -#### Light processing - -John is starting a project analysing drought-resistant genes from 20 crop samples (~7 GB -raw sequencing data per sample). His pipeline includes quality control with `FastQC`, -adapter trimming, alignment to a reference genome, annotation, and phylogenetic tree -construction. - -The most resource-intensive steps require moderate CPU and memory. Because John is analysing -a subset of genes rather than whole genomes, each run is relatively small: - -- 2–4 vCPUs -- Up to 10 GB RAM - -A balanced `m3` flavour (4 vCPUs / 8 GB RAM) is a good starting point. - -#### Memory-intensive processing - -Georgie is running `GATK4` best-practice variant calling on 15 human exome samples. Each -sample produces BAM files of ~15 GB, with similar-sized temporary files during processing. -Total storage is approximately 1 TB. `GATK4` tools benefit from both high memory and -multiple CPU cores: - -- 8 vCPUs -- 32 GB RAM - -An `r3` equivalent (8 vCPUs / 32 GB RAM) is suitable. If processing ~30 exomes or running -multiple jobs in parallel, consider doubling the CPU and memory allocation. - -### How to choose if you are unsure - -1. **Check software documentation** — most bioinformatics tools publish minimum and - recommended system requirements. -2. **Start small and scale up** — begin with a smaller flavour. If jobs run slowly with - CPU usage consistently near 100%, you need more vCPUs. If jobs fail with memory errors, - you need more RAM. -3. **Review previous runs** — if you have run similar analyses before, check your peak CPU - and RAM usage from those logs to guide your estimate. - -> **Note:** For interactive work in JupyterLab or RStudio, 2–4 vCPUs and 8–16 GB RAM is -> sufficient for most datasets. Increase memory if you are loading large files directly -> into your session. - ---- - -## Reproducible tools {#tools} - -BioShell gives you access to thousands of bioinformatics tools through -[CernVM-FS (CVMFS)](https://cvmfs.readthedocs.io/en/stable/), a read-only shared file system -that delivers containerised tools and reference datasets on demand. Rather than downloading -or installing software yourself, you load tools as modules — they are pulled from CVMFS only -when you need them. - -Through CVMFS, BioShell connects to three repositories: - -| Repository | Contents | -|-----------|----------| -| `singularity.galaxyproject.org` | Containerised tools from [BioContainers](https://biocontainers.pro/) | -| `data.galaxyproject.org` | Reference genome builds and pre-built indexes from the Galaxy Project | -| `data.biocommons.aarnet.edu.au` | [AUTHOR TO SUPPLY — confirm final name and description] | - -> **Note:** CVMFS does not download everything in advance. It fetches files only when you -> access them, so the first time you use a tool or dataset it may take a moment to load. -> Subsequent access is faster because the files are cached locally. - -### Step 1 — Verify your CVMFS connection - -When you first connect to BioShell you will see the following welcome message: - -``` -################################################################## -# # -# Welcome to BioShell! # -# # -# A range of commonly used bioinformatics software is available # -# via CVMFS. # -# # -# To make the software folders visible, run: # -# cvmfs_config probe # -# # -# If you need help finding and installing software, # -# ask Shelley-Bio # -# _ .----. # -# (_ \/ \_, # -# `uu----uu' # -# Basic commands: # -# shelley-bio find # -# shelley-bio search "" # -# shelley-bio versions # -# shelley-bio build # -# shelley-bio interactive # -################################################################## -``` - -Run the probe command to confirm CVMFS is connected: - -```bash -cvmfs_config probe -``` - -You should see `OK` for each repository. If a repository shows `Failed!`, wait a moment and -try again. Contact [Australian BioCommons support](https://www.biocommons.org.au/helpdesk) -if the problem persists. - -### Step 2 — Find and install tools with Bio-Shelley {#bio-shelley} - -**Bio-Shelley** is BioShell's built-in assistant for finding and installing tools. It indexes -over 700 tools and 118,000 container versions from the BioContainers catalogue so you do not -need to know container paths or SHPC commands. - -You can use Bio-Shelley in two ways: - -**Directly from the command line:** - -```bash -shelley-bio find -shelley-bio search "" -shelley-bio versions -shelley-bio build -``` - -**Or in interactive mode:** - -```bash -shelley-bio interactive -``` - -| Command | What it does | Example | -|---------|-------------|---------| -| `find ` | Look up a specific tool by name | `shelley-bio find fastqc` | -| `search ""` | Search by keyword or function | `shelley-bio search "quality control"` | -| `versions ` | List all available versions | `shelley-bio versions samtools` | -| `build ` | Install the tool as a loadable module | `shelley-bio build samtools/1.21` | -| `interactive` | Launch Bio-Shelley in interactive mode | `shelley-bio interactive` | - -The `build` command handles the full installation automatically — it finds the correct -container in CVMFS, generates the module file, and makes the tool ready to load. - -![](images/bioshell/SCREENSHOT_NEEDED_bioshelley_build.png) - -*Fig 3. Using `shelley-bio build samtools/1.21` to install a tool module.* - -> **Tip:** Use `shelley-bio search` when you know what you want to do but not the tool name. -> For example, `shelley-bio search "variant calling"` returns tools relevant to that task. - -### Step 3 — Load a tool - -Once Bio-Shelley has built a module, load it with: - -```bash -module use ~/shpc/modules -module load / -``` - -Verify the tool is ready: - -```bash - --version -``` - -> **Note:** Run `module use ~/shpc/modules` each session, or add it to your `~/.bashrc` -> to make it permanent. - ---- - -## Reference datasets {#reference-data} - -CVMFS also provides access to reference genome builds and pre-built indexes from the Galaxy -Project. You can use these directly in your analyses by referencing their absolute file path. - -List the available reference data: - -```bash -ls /cvmfs/data.galaxyproject.org/byhand/ -ls /cvmfs/data.galaxyproject.org/managed/ -``` - -| Directory | Contents | -|-----------|---------| -| `/managed` | Datasets generated with Galaxy Data Manager tools. Organised by index type, then genome build. | -| `/byhand` | Older, manually curated datasets. Organised by genome build, then index type. | - -To use a reference file in your analysis, pass its absolute path directly to your tool. -For example, the human CHM13 T2T v2.0 FASTA file is at: - -``` -/cvmfs/data.galaxyproject.org/byhand/CHM13_T2T_v2.0/seq/CHM13_T2T_v2.0.fa -``` - -> **Note:** The reference datasets available through CVMFS are those hosted by the Galaxy -> Project and may not be comprehensive. This is not a replacement for your institution's -> primary data access methods. - ---- - -## Interactive environments {#interactive} - -BioShell supports two browser-based interactive environments for notebook and script-based -work. Both run on your BioShell instance and are accessible through your browser once you -have connected via SSH. - -> **Important:** You must have an active SSH connection to your BioShell instance before -> opening either environment in your browser. See [Connecting to BioShell](/access#connecting). - -### JupyterLab - -JupyterLab is a browser-based environment for writing and running Python, R, and shell -notebooks. It is well suited to exploratory analysis, visualisation, and combining code -with documentation. - -Open your browser and go to: - -``` -http://:8888 -``` - -![](images/bioshell/SCREENSHOT_NEEDED_jupyterlab.png) - -*Fig 4. JupyterLab open in a browser connected to a BioShell instance.* - -### RStudio - -RStudio is a browser-based environment for R-based analysis. If you are familiar with the -RStudio desktop application, the browser version works in exactly the same way. - -Open your browser and go to: - -``` -http://:8787 -``` - -![](images/bioshell/SCREENSHOT_NEEDED_rstudio.png) - -*Fig 5. RStudio open in a browser connected to a BioShell instance.* - -> **Tip:** Your SSH terminal and browser environments connect to the same BioShell instance. -> Any tool you have loaded with `module load` in your terminal is also available inside -> JupyterLab and RStudio. - -> **Note:** If you cannot reach JupyterLab or RStudio in your browser, check that your SSH -> connection is still active. If you are connecting from outside your institution's network, -> you may need an SSH tunnel. Contact your local IT support for help with this. - ---- - -## Using Nextflow with CVMFS {#nextflow} - -If you are running a Nextflow workflow, you can direct processes to use containers already -in CVMFS rather than downloading them. Create a config file that overrides the container for -the relevant process: - -```groovy -process { - withName: 'FASTQC' { - container = 'file:///cvmfs/singularity.galaxyproject.org/all/fastqc:0.12.1--hdfd78af_0' - } -} -``` - -Or use an installed SHPC module by name: - -```groovy -process { - withName: 'FASTQC' { - module = 'fastqc/0.11.9' - } -} -``` - -Run your workflow with the config override: - -```bash -nextflow run main.nf -profile singularity -config cvmfs_path.config -``` - -> **Note:** The `module` value must match exactly what appears in `module avail`. - ---- - -## Advanced: manual SHPC commands {#shpc-manual} - -If you need to install a tool version that Bio-Shelley cannot find, or you prefer to work -directly with SHPC, use the steps below. - -> **Tip:** Bio-Shelley handles most cases, including older tool versions not in the default -> registry. Try `shelley-bio versions ` before attempting manual installation. - -**Load SHPC:** - -```bash -module load shpc -``` - -**Search for a tool and view versions:** - -```bash -shpc show -f -shpc show quay.io/biocontainers/ -``` - -**Install a module directly from CVMFS:** - -```bash -shpc install quay.io/biocontainers/: \ - /cvmfs/singularity.galaxyproject.org/all/: \ - --keep-path -``` - -**Make the module available and load it:** -```bash -module use ~/shpc/modules -module load quay.io/biocontainers// -``` +Use the sections below to find what you need. -For further documentation see the -[SHPC user guide](https://singularity-hpc.readthedocs.io/en/latest/getting_started/user-guide.html). +| I want to… | Go to | +|-----------|-------| +| Choose the right VM size for my analysis | [Choosing a flavour](/flavours) | +| Find, install, and load a bioinformatics tool | [Tools and reference data](/tools) | +| Work in a Jupyter or RStudio notebook | [Interactive environments](/interactive) | +| Run a Nextflow workflow using CVMFS containers | [Using Nextflow with CVMFS](/nextflow) | From f5bf95ba79ddaf5ff14448aaa8306d12649cb7a8 Mon Sep 17 00:00:00 2001 From: Mitchob Date: Tue, 16 Jun 2026 16:55:24 +1000 Subject: [PATCH 2/3] instructions for generating an ssh key --- _data/sidebars/main.yml | 7 +++ pages/access.md | 108 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 110 insertions(+), 5 deletions(-) diff --git a/_data/sidebars/main.yml b/_data/sidebars/main.yml index 0c719f5..c84ed55 100644 --- a/_data/sidebars/main.yml +++ b/_data/sidebars/main.yml @@ -12,6 +12,13 @@ subitems: url: /access#duration - title: Connecting to BioShell url: /access#connecting + subitems: + - title: Generate an SSH key + url: /access#ssh-key + - title: Submit your public key + url: /access#submit-key + - title: Connect + url: /access#connect - title: Using BioShell url: /using-bioshell subitems: diff --git a/pages/access.md b/pages/access.md index 5b12712..e6edac5 100644 --- a/pages/access.md +++ b/pages/access.md @@ -77,15 +77,113 @@ application. ## Connecting to BioShell {#connecting} -Once your environment is provisioned you will receive connection details by email. Connect -via SSH from your local terminal: +Once your environment is provisioned you will receive connection details by email. + +### Step 1 — Generate an SSH key {#ssh-key} + +BioShell uses SSH key authentication. If you do not already have an SSH key pair, generate +one on your local machine before connecting. + +**Quick start** (works on macOS, Linux, and Windows with OpenSSH): + +```bash +ssh-keygen -t ed25519 -C "user@example.com" -f ~/.ssh/bioshell_key +``` + +Replace `user@example.com` with your own email address. Accept the default prompts, or set +a passphrase when asked (recommended). + +**Complete setup — macOS:** + +```bash +# Create .ssh directory if it doesn't exist +mkdir -p ~/.ssh && chmod 700 ~/.ssh + +# Generate the key +ssh-keygen -t ed25519 -C "user@example.com" -f ~/.ssh/bioshell_key + +# Start ssh-agent and load the key into the macOS Keychain +eval "$(ssh-agent -s)" +ssh-add --apple-use-keychain ~/.ssh/bioshell_key + +# Set correct permissions on the private key +chmod 600 ~/.ssh/bioshell_key + +# Copy the public key to your clipboard +pbcopy < ~/.ssh/bioshell_key.pub +``` + +**Complete setup — Linux:** + +```bash +# Create .ssh directory if it doesn't exist +mkdir -p ~/.ssh && chmod 700 ~/.ssh + +# Generate the key +ssh-keygen -t ed25519 -C "user@example.com" -f ~/.ssh/bioshell_key + +# Start ssh-agent and add the key +eval "$(ssh-agent -s)" +ssh-add ~/.ssh/bioshell_key + +# Set correct permissions on the private key +chmod 600 ~/.ssh/bioshell_key + +# Copy the public key to your clipboard (requires xclip) +xclip -selection clipboard < ~/.ssh/bioshell_key.pub +``` + +**Complete setup — Windows (PowerShell with OpenSSH):** + +```powershell +# Generate the key +ssh-keygen -t ed25519 -C "user@example.com" -f "$env:USERPROFILE\.ssh\bioshell_key" + +# Start ssh-agent service and add the key +Start-Service ssh-agent +ssh-add "$env:USERPROFILE\.ssh\bioshell_key" + +# Copy the public key to your clipboard +Get-Content "$env:USERPROFILE\.ssh\bioshell_key.pub" | Set-Clipboard +``` + +> **Note:** OpenSSH ships with Windows 10 (version 1809 and later) and Windows 11. If +> `ssh-keygen` is not found, go to **Settings → Apps → Optional features** and install +> **OpenSSH Client**. + +> **Tip:** Your public key is the `.pub` file — this is what you share with others or +> submit when registering for access. Never share your private key (the file without `.pub`). + +### Step 2 — Submit your public key {#submit-key} + +[AUTHOR TO SUPPLY — confirm how users submit their public key as part of the BioShell +provisioning process, e.g. via the access request form or a separate step after approval] + +### Step 3 — Connect {#connect} + +Add an entry to `~/.ssh/config` so SSH automatically uses your BioShell key without +needing to specify it each time: + +``` +Host bioshell + HostName + User + IdentityFile ~/.ssh/bioshell_key +``` + +Then connect with: + +```bash +ssh bioshell +``` + +Or connect directly without the config entry: ```bash -ssh @ +ssh -i ~/.ssh/bioshell_key @ ``` -[AUTHOR TO SUPPLY — confirm username format and whether SSH key or password authentication -is used] +[AUTHOR TO SUPPLY — confirm username format] Once connected, you can also open interactive environments directly in your browser: From 92276e22cf5844a22f010522ef053233627e0794 Mon Sep 17 00:00:00 2001 From: Mitchob Date: Tue, 16 Jun 2026 17:07:37 +1000 Subject: [PATCH 3/3] fix broken gitation update ruby --- .github/workflows/jekyll.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/jekyll.yml b/.github/workflows/jekyll.yml index 456d9ba..1755a4e 100644 --- a/.github/workflows/jekyll.yml +++ b/.github/workflows/jekyll.yml @@ -25,7 +25,7 @@ jobs: - name: Setup Ruby uses: ruby/setup-ruby@v1.204.0 with: - ruby-version: '3.3' + ruby-version: '3.1' bundler-cache: true cache-version: 0