From 88eabb720679161e77271ed4f4fee3af9027fe8d Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Mon, 15 Jan 2024 14:47:33 +0000
Subject: [PATCH] add comments

---
 .github/workflows/ncbench.yml           |  1 +
 .github/workflows/set_ncbench_config.sh |  4 ++++
 .github/workflows/upload.py             | 11 +++++++++++
 README.md                               |  8 ++++++++
 4 files changed, 24 insertions(+)

diff --git a/.github/workflows/ncbench.yml b/.github/workflows/ncbench.yml
index c9fe1a060a..735815c14f 100644
--- a/.github/workflows/ncbench.yml
+++ b/.github/workflows/ncbench.yml
@@ -71,6 +71,7 @@ jobs:
       - name: Upload to zenodo
         id: upload_zenodo
         env:
+          # ATTENTION: Use SANDBOX TOKEN during development: ${{ secrets.NCBENCH_CI_TOKEN_SANDBOX }}
           ACCESS_TOKEN: ${{ secrets.NCBENCH_CI_TOKEN_SANDBOX }} # ${{ secrets.ZENODO_DEPOSIT }}
           PIPELINE_VERSION: ${{ env.PIPELINE_VERSION }}
         run: python .github/workflows/upload.py
diff --git a/.github/workflows/set_ncbench_config.sh b/.github/workflows/set_ncbench_config.sh
index 1d5fd0fdf7..be32a86d7d 100644
--- a/.github/workflows/set_ncbench_config.sh
+++ b/.github/workflows/set_ncbench_config.sh
@@ -1,5 +1,9 @@
 #!/bin/bash
 
+# This script updates the config.yml in the NCBench repository.
+# The config file is needed to trigger the subsequent benchmarking workflow.
+# For each variant caller and each input file a new entry needs to be created.
+
 declare -A variant_callers=(
     ["deepvariant"]="NA12878_%sM.deepvariant.vcf.gz"
     ["freebayes"]="NA12878_%sM.freebayes.vcf.gz"
diff --git a/.github/workflows/upload.py b/.github/workflows/upload.py
index 0cbf8da5e0..58fd820479 100644
--- a/.github/workflows/upload.py
+++ b/.github/workflows/upload.py
@@ -2,6 +2,17 @@
 import os
 import json
 
+'''
+This scripts collects all variant calling files and uploads them to Zenodo.
+1. A new Zenodo entry is created
+2. All files are uploaded
+3. Meta data is added: Pipeline version, authors
+4. Entry is published.
+
+ATTENTION: Use sandbox links during development! They are set in each affected line as comment.
+            If you need to use the production Zenodo links, turn off publishing (see bottom).
+'''
+
 headers = {"Content-Type": "application/json"}
 access_token = os.environ["ACCESS_TOKEN"]
 params = {"access_token": access_token}
diff --git a/README.md b/README.md
index 6e4688e1b1..fb30a34f7f 100644
--- a/README.md
+++ b/README.md
@@ -94,6 +94,14 @@ To see the results of an example test run with a full size dataset refer to the
 For more details about the output files and reports, please refer to the
 [output documentation](https://nf-co.re/sarek/output).
 
+## Benchmarking
+
+On each release, the pipeline is run on 3 full size tests:
+
+- `test_full` runs tumor-normal data for one patient from the SEQ2C consortium
+- `test_full_germline` runs a WGS 30X Genome-in-a-Bottle(NA12878) dataset
+- `test_full_germline_ncbench_agilent` runs two WES samples with 75M and 200M reads (data available [here](https://github.com/ncbench/ncbench-workflow#contributing-callsets)). The results are uploaded to Zenodo, evaluated against a truth dataset, and results are made available via the [NCBench dashboard](https://ncbench.github.io/report/report.html#).
+
 ## Credits
 
 Sarek was originally written by Maxime U Garcia and Szilveszter Juhos at the [National Genomics Infastructure](https://ngisweden.scilifelab.se) and [National Bioinformatics Infastructure Sweden](https://nbis.se) which are both platforms at [SciLifeLab](https://scilifelab.se), with the support of [The Swedish Childhood Tumor Biobank (Barntumörbanken)](https://ki.se/forskning/barntumorbanken).