From 88eabb720679161e77271ed4f4fee3af9027fe8d Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Mon, 15 Jan 2024 14:47:33 +0000 Subject: [PATCH] add comments --- .github/workflows/ncbench.yml | 1 + .github/workflows/set_ncbench_config.sh | 4 ++++ .github/workflows/upload.py | 11 +++++++++++ README.md | 8 ++++++++ 4 files changed, 24 insertions(+) diff --git a/.github/workflows/ncbench.yml b/.github/workflows/ncbench.yml index c9fe1a060a..735815c14f 100644 --- a/.github/workflows/ncbench.yml +++ b/.github/workflows/ncbench.yml @@ -71,6 +71,7 @@ jobs: - name: Upload to zenodo id: upload_zenodo env: + # ATTENTION: Use SANDBOX TOKEN during development: ${{ secrets.NCBENCH_CI_TOKEN_SANDBOX }} ACCESS_TOKEN: ${{ secrets.NCBENCH_CI_TOKEN_SANDBOX }} # ${{ secrets.ZENODO_DEPOSIT }} PIPELINE_VERSION: ${{ env.PIPELINE_VERSION }} run: python .github/workflows/upload.py diff --git a/.github/workflows/set_ncbench_config.sh b/.github/workflows/set_ncbench_config.sh index 1d5fd0fdf7..be32a86d7d 100644 --- a/.github/workflows/set_ncbench_config.sh +++ b/.github/workflows/set_ncbench_config.sh @@ -1,5 +1,9 @@ #!/bin/bash +# This script updates the config.yml in the NCBench repository. +# The config file is needed to trigger the subsequent benchmarking workflow. +# For each variant caller and each input file a new entry needs to be created. + declare -A variant_callers=( ["deepvariant"]="NA12878_%sM.deepvariant.vcf.gz" ["freebayes"]="NA12878_%sM.freebayes.vcf.gz" diff --git a/.github/workflows/upload.py b/.github/workflows/upload.py index 0cbf8da5e0..58fd820479 100644 --- a/.github/workflows/upload.py +++ b/.github/workflows/upload.py @@ -2,6 +2,17 @@ import os import json +''' +This scripts collects all variant calling files and uploads them to Zenodo. +1. A new Zenodo entry is created +2. All files are uploaded +3. Meta data is added: Pipeline version, authors +4. Entry is published. + +ATTENTION: Use sandbox links during development! They are set in each affected line as comment. + If you need to use the production Zenodo links, turn off publishing (see bottom). +''' + headers = {"Content-Type": "application/json"} access_token = os.environ["ACCESS_TOKEN"] params = {"access_token": access_token} diff --git a/README.md b/README.md index 6e4688e1b1..fb30a34f7f 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,14 @@ To see the results of an example test run with a full size dataset refer to the For more details about the output files and reports, please refer to the [output documentation](https://nf-co.re/sarek/output). +## Benchmarking + +On each release, the pipeline is run on 3 full size tests: + +- `test_full` runs tumor-normal data for one patient from the SEQ2C consortium +- `test_full_germline` runs a WGS 30X Genome-in-a-Bottle(NA12878) dataset +- `test_full_germline_ncbench_agilent` runs two WES samples with 75M and 200M reads (data available [here](https://github.com/ncbench/ncbench-workflow#contributing-callsets)). The results are uploaded to Zenodo, evaluated against a truth dataset, and results are made available via the [NCBench dashboard](https://ncbench.github.io/report/report.html#). + ## Credits Sarek was originally written by Maxime U Garcia and Szilveszter Juhos at the [National Genomics Infastructure](https://ngisweden.scilifelab.se) and [National Bioinformatics Infastructure Sweden](https://nbis.se) which are both platforms at [SciLifeLab](https://scilifelab.se), with the support of [The Swedish Childhood Tumor Biobank (Barntumörbanken)](https://ki.se/forskning/barntumorbanken).