Merge pull request #766 from erikrikarddaniel/fix-unite-urls

Fix unite urls
nf-core · Jul 24, 2024 · e057eba · e057eba
2 parents e66c9d3 + 8e9b28d
commit e057eba
Show file tree

Hide file tree

Showing 9 changed files with 193 additions and 97 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,14 +8,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### `Added`
 
 - [#765](https://github.com/nf-core/ampliseq/pull/765) - Added version R09-RS220 of curated GTDB 16S taxonomy: `sbdi-gtdb=R09-RS220-1` or `sbdi-gtdb` as parameter to `--dada_ref_taxonomy`
+- [#766](https://github.com/nf-core/ampliseq/pull/766) - Added version 10 of Unite as options for `--sintax_ref_taxonomy`
 
 ### `Changed`
 
 - [#762](https://github.com/nf-core/ampliseq/pull/762) - Improved output documentation section "Optional ASV filtering" and parameter documentation
+- [#766](https://github.com/nf-core/ampliseq/pull/766) - Modified warning filenames from `QIIME2_ANCOM` to avoid collisions
+- [#766](https://github.com/nf-core/ampliseq/pull/766) - Disabled Unite databases from the `--qiime_ref_taxonomy` because of divergent results compared to the other classifiers
 
 ### `Fixed`
 
 - [#761](https://github.com/nf-core/ampliseq/pull/761) - Some sample sheet checks were not applied due to changes in the metadata ["meta"] structure in version 2.9.0
+- [#766](https://github.com/nf-core/ampliseq/pull/766) - Fixed broken urls for Unite databases (issue [#764](https://github.com/nf-core/ampliseq/issues/764))
 
 ### `Dependencies`
 

diff --git a/bin/taxref_reformat_sintax.sh → bin/taxref_reformat_sintax_fasta.sh b/bin/taxref_reformat_sintax.sh → bin/taxref_reformat_sintax_fasta.sh
@@ -5,4 +5,3 @@
 # Just rename the preformatted file
 # Assumes only one (gzipped) file
 mv * sintaxdb.fa.gz
-
diff --git a/bin/taxref_reformat_sintax_tar.sh b/bin/taxref_reformat_sintax_tar.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+# Handles preformatted database tar files suitable for sintax
+#
+# This turned out to be a MISTAKE and is NOT USED, but I'm keeping the file for a while anyway.
+
+# Extract the fasta file without _dev in its name
+f=$(tar tfz *.tgz | grep fasta | grep -v '_dev')
+tar xzf *.tgz $f
+
+# Change the name and gzip
+mv $f sintaxdb.fa
+gzip sintaxdb.fa
diff --git a/conf/ref_databases.config b/conf/ref_databases.config
diff --git a/conf/test_its_dada_taxonomy.config b/conf/test_its_dada_taxonomy.config
@@ -0,0 +1,50 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/ampliseq -profile test_sintax,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name = 'Test sintax profile'
+    config_profile_description = 'Minimal test dataset to check pipeline function when using sintax for taxonomy assignment'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '12.GB'
+    max_time   = '6.h'
+
+    // Input data
+    FW_primer = "CTTGGTCATTTAGAGGAAGTAA"
+    RV_primer = "TCCTGAGGGAAACTTCG"
+    input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet_pacbio_ITS.tsv"
+    metadata = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Metadata_pacbio_ITS.tsv"
+    pacbio = true
+    max_ee = 12
+    cut_its = "its2"
+
+    skip_dada_taxonomy = false
+    dada_ref_taxonomy = "unite-fungi"
+
+    //this is to remove low abundance ASVs to reduce runtime of downstream processes
+    min_samples = 2
+    min_frequency = 10
+
+    //produce average barplots
+    metadata_category_barplot = "var2,var3"
+
+    //restrict ANCOM analysis to higher taxonomic levels
+    tax_agglom_max = 4
+    ancom = true
+
+    sbdiexport = true
+
+    qiime_adonis_formula = "var2"
+
+    diversity_rarefaction_depth = 500
+}
diff --git a/modules/local/qiime2_ancom_tax.nf b/modules/local/qiime2_ancom_tax.nf
@@ -43,7 +43,7 @@ process QIIME2_ANCOM_TAX {
         --to-tsv
 
     if [ \$(grep -v '^#' -c ${table.baseName}-level-${taxlevel}.feature-table.tsv) -lt 2 ]; then
-        echo ${taxlevel} > ancom/\"WARNING Summing your data at taxonomic level ${taxlevel} produced less than two rows (taxa), ANCOM can't proceed -- did you specify a bad reference taxonomy?\".txt
+        echo ${taxlevel} > ancom/\"WARNING ${table.baseName} Summing your data at taxonomic level ${taxlevel} produced less than two rows (taxa), ANCOM can't proceed -- did you specify a bad reference taxonomy?\".txt
     else
         qiime composition add-pseudocount \\
                 --i-table lvl${taxlevel}-${table} \\

diff --git a/nextflow.config b/nextflow.config
@@ -284,21 +284,22 @@ profiles {
         executor.cpus           = 4
         executor.memory         = 8.GB
     }
-    test               { includeConfig 'conf/test.config'               }
-    test_single        { includeConfig 'conf/test_single.config'        }
-    test_multi         { includeConfig 'conf/test_multi.config'         }
-    test_doubleprimers { includeConfig 'conf/test_doubleprimers.config' }
-    test_pacbio_its    { includeConfig 'conf/test_pacbio_its.config'    }
-    test_iontorrent    { includeConfig 'conf/test_iontorrent.config'    }
-    test_fasta         { includeConfig 'conf/test_fasta.config'         }
-    test_failed        { includeConfig 'conf/test_failed.config'        }
-    test_full          { includeConfig 'conf/test_full.config'          }
-    test_reftaxcustom  { includeConfig 'conf/test_reftaxcustom.config'  }
-    test_qiimecustom   { includeConfig 'conf/test_qiimecustom.config'   }
-    test_novaseq       { includeConfig 'conf/test_novaseq.config'       }
-    test_pplace        { includeConfig 'conf/test_pplace.config'        }
-    test_sintax        { includeConfig 'conf/test_sintax.config'        }
-    test_multiregion   { includeConfig 'conf/test_multiregion.config'   }
+    test                   { includeConfig 'conf/test.config'                   }
+    test_single            { includeConfig 'conf/test_single.config'            }
+    test_multi             { includeConfig 'conf/test_multi.config'             }
+    test_doubleprimers     { includeConfig 'conf/test_doubleprimers.config'     }
+    test_pacbio_its        { includeConfig 'conf/test_pacbio_its.config'        }
+    test_iontorrent        { includeConfig 'conf/test_iontorrent.config'        }
+    test_fasta             { includeConfig 'conf/test_fasta.config'             }
+    test_failed            { includeConfig 'conf/test_failed.config'            }
+    test_full              { includeConfig 'conf/test_full.config'              }
+    test_reftaxcustom      { includeConfig 'conf/test_reftaxcustom.config'      }
+    test_qiimecustom       { includeConfig 'conf/test_qiimecustom.config'       }
+    test_novaseq           { includeConfig 'conf/test_novaseq.config'           }
+    test_pplace            { includeConfig 'conf/test_pplace.config'            }
+    test_sintax            { includeConfig 'conf/test_sintax.config'            }
+    test_its_dada_taxonomy { includeConfig 'conf/test_its_dada_taxonomy.config' }
+    test_multiregion       { includeConfig 'conf/test_multiregion.config'       }
 }
 
 // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -381,13 +381,13 @@
                         "silva=132",
                         "silva=138",
                         "unite-alleuk",
-                        "unite-alleuk=8.2",
-                        "unite-alleuk=8.3",
                         "unite-alleuk=9.0",
+                        "unite-alleuk=8.3",
+                        "unite-alleuk=8.2",
                         "unite-fungi",
-                        "unite-fungi=8.2",
-                        "unite-fungi=8.3",
                         "unite-fungi=9.0",
+                        "unite-fungi=8.3",
+                        "unite-fungi=8.2",
                         "zehr-nifh",
                         "zehr-nifh=2.5.0"
                     ]
@@ -454,20 +454,7 @@
                     "type": "string",
                     "help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `silva=138`) . This will download the desired database and initiate taxonomic classification with QIIME2 and the chosen database.\n\nIf both, `--dada_ref_taxonomy` and `--qiime_ref_taxonomy` are used, DADA2 classification will be used for downstream analysis.\n\nThe following databases are supported:\n- SILVA ribosomal RNA gene database project - 16S rRNA\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n- Greengenes (only testing!)\n\nGenerally, using `silva`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version. For testing purposes, the tiny database `greengenes85` (dereplicated at 85% sequence similarity) is available. For details on what values are valid, please either use an invalid value such as `x` (causing the pipeline to send an error message with all valid values) or see `conf/ref_databases.config`.",
                     "description": "Name of supported database, and optionally also version number",
-                    "enum": [
-                        "silva=138",
-                        "silva",
-                        "unite-fungi=8.3",
-                        "unite-fungi=8.2",
-                        "unite-fungi",
-                        "unite-alleuk=9.0",
-                        "unite-alleuk=8.3",
-                        "unite-alleuk=8.2",
-                        "unite-alleuk",
-                        "greengenes85",
-                        "greengenes2",
-                        "greengenes2=2022.10"
-                    ]
+                    "enum": ["silva=138", "silva", "greengenes85", "greengenes2", "greengenes2=2022.10"]
                 },
                 "qiime_ref_tax_custom": {
                     "type": "string",
@@ -520,14 +507,16 @@
                     "enum": [
                         "coidb",
                         "coidb=221216",
+                        "unite-fungi",
+                        "unite-fungi=10.0",
                         "unite-fungi=9.0",
                         "unite-fungi=8.3",
                         "unite-fungi=8.2",
-                        "unite-fungi",
+                        "unite-alleuk",
+                        "unite-alleuk=10.0",
                         "unite-alleuk=9.0",
                         "unite-alleuk=8.3",
-                        "unite-alleuk=8.2",
-                        "unite-alleuk"
+                        "unite-alleuk=8.2"
                     ]
                 },
                 "addsh": {

diff --git a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf
@@ -245,8 +245,8 @@ def validateInputParameters() {
         "rdp","rdp=18",
         "sbdi-gtdb","sbdi-gtdb=R09-RS220-1","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1",
         "silva","silva=138","silva=132",
-        "unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2",
-        "unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"
+        "unite-fungi","unite-fungi=10.0","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2",
+        "unite-alleuk","unite-alleuk=10.0","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"
     ]
     if (params.sbdiexport){
         if (params.sintax_ref_taxonomy ) {