Skip to content

Commit

Permalink
Merge pull request #766 from erikrikarddaniel/fix-unite-urls
Browse files Browse the repository at this point in the history
Fix unite urls
  • Loading branch information
erikrikarddaniel committed Jul 24, 2024
2 parents e66c9d3 + 8e9b28d commit e057eba
Show file tree
Hide file tree
Showing 9 changed files with 193 additions and 97 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Added`

- [#765](https://github.com/nf-core/ampliseq/pull/765) - Added version R09-RS220 of curated GTDB 16S taxonomy: `sbdi-gtdb=R09-RS220-1` or `sbdi-gtdb` as parameter to `--dada_ref_taxonomy`
- [#766](https://github.com/nf-core/ampliseq/pull/766) - Added version 10 of Unite as options for `--sintax_ref_taxonomy`

### `Changed`

- [#762](https://github.com/nf-core/ampliseq/pull/762) - Improved output documentation section "Optional ASV filtering" and parameter documentation
- [#766](https://github.com/nf-core/ampliseq/pull/766) - Modified warning filenames from `QIIME2_ANCOM` to avoid collisions
- [#766](https://github.com/nf-core/ampliseq/pull/766) - Disabled Unite databases from the `--qiime_ref_taxonomy` because of divergent results compared to the other classifiers

### `Fixed`

- [#761](https://github.com/nf-core/ampliseq/pull/761) - Some sample sheet checks were not applied due to changes in the metadata ["meta"] structure in version 2.9.0
- [#766](https://github.com/nf-core/ampliseq/pull/766) - Fixed broken urls for Unite databases (issue [#764](https://github.com/nf-core/ampliseq/issues/764))

### `Dependencies`

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,3 @@
# Just rename the preformatted file
# Assumes only one (gzipped) file
mv * sintaxdb.fa.gz

13 changes: 13 additions & 0 deletions bin/taxref_reformat_sintax_tar.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/sh

# Handles preformatted database tar files suitable for sintax
#
# This turned out to be a MISTAKE and is NOT USED, but I'm keeping the file for a while anyway.

# Extract the fasta file without _dev in its name
f=$(tar tfz *.tgz | grep fasta | grep -v '_dev')
tar xzf *.tgz $f

# Change the name and gzip
mv $f sintaxdb.fa
gzip sintaxdb.fa
154 changes: 97 additions & 57 deletions conf/ref_databases.config

Large diffs are not rendered by default.

50 changes: 50 additions & 0 deletions conf/test_its_dada_taxonomy.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/ampliseq -profile test_sintax,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test sintax profile'
config_profile_description = 'Minimal test dataset to check pipeline function when using sintax for taxonomy assignment'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '12.GB'
max_time = '6.h'

// Input data
FW_primer = "CTTGGTCATTTAGAGGAAGTAA"
RV_primer = "TCCTGAGGGAAACTTCG"
input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet_pacbio_ITS.tsv"
metadata = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Metadata_pacbio_ITS.tsv"
pacbio = true
max_ee = 12
cut_its = "its2"

skip_dada_taxonomy = false
dada_ref_taxonomy = "unite-fungi"

//this is to remove low abundance ASVs to reduce runtime of downstream processes
min_samples = 2
min_frequency = 10

//produce average barplots
metadata_category_barplot = "var2,var3"

//restrict ANCOM analysis to higher taxonomic levels
tax_agglom_max = 4
ancom = true

sbdiexport = true

qiime_adonis_formula = "var2"

diversity_rarefaction_depth = 500
}
2 changes: 1 addition & 1 deletion modules/local/qiime2_ancom_tax.nf
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ process QIIME2_ANCOM_TAX {
--to-tsv
if [ \$(grep -v '^#' -c ${table.baseName}-level-${taxlevel}.feature-table.tsv) -lt 2 ]; then
echo ${taxlevel} > ancom/\"WARNING Summing your data at taxonomic level ${taxlevel} produced less than two rows (taxa), ANCOM can't proceed -- did you specify a bad reference taxonomy?\".txt
echo ${taxlevel} > ancom/\"WARNING ${table.baseName} Summing your data at taxonomic level ${taxlevel} produced less than two rows (taxa), ANCOM can't proceed -- did you specify a bad reference taxonomy?\".txt
else
qiime composition add-pseudocount \\
--i-table lvl${taxlevel}-${table} \\
Expand Down
31 changes: 16 additions & 15 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -284,21 +284,22 @@ profiles {
executor.cpus = 4
executor.memory = 8.GB
}
test { includeConfig 'conf/test.config' }
test_single { includeConfig 'conf/test_single.config' }
test_multi { includeConfig 'conf/test_multi.config' }
test_doubleprimers { includeConfig 'conf/test_doubleprimers.config' }
test_pacbio_its { includeConfig 'conf/test_pacbio_its.config' }
test_iontorrent { includeConfig 'conf/test_iontorrent.config' }
test_fasta { includeConfig 'conf/test_fasta.config' }
test_failed { includeConfig 'conf/test_failed.config' }
test_full { includeConfig 'conf/test_full.config' }
test_reftaxcustom { includeConfig 'conf/test_reftaxcustom.config' }
test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' }
test_novaseq { includeConfig 'conf/test_novaseq.config' }
test_pplace { includeConfig 'conf/test_pplace.config' }
test_sintax { includeConfig 'conf/test_sintax.config' }
test_multiregion { includeConfig 'conf/test_multiregion.config' }
test { includeConfig 'conf/test.config' }
test_single { includeConfig 'conf/test_single.config' }
test_multi { includeConfig 'conf/test_multi.config' }
test_doubleprimers { includeConfig 'conf/test_doubleprimers.config' }
test_pacbio_its { includeConfig 'conf/test_pacbio_its.config' }
test_iontorrent { includeConfig 'conf/test_iontorrent.config' }
test_fasta { includeConfig 'conf/test_fasta.config' }
test_failed { includeConfig 'conf/test_failed.config' }
test_full { includeConfig 'conf/test_full.config' }
test_reftaxcustom { includeConfig 'conf/test_reftaxcustom.config' }
test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' }
test_novaseq { includeConfig 'conf/test_novaseq.config' }
test_pplace { includeConfig 'conf/test_pplace.config' }
test_sintax { includeConfig 'conf/test_sintax.config' }
test_its_dada_taxonomy { includeConfig 'conf/test_its_dada_taxonomy.config' }
test_multiregion { includeConfig 'conf/test_multiregion.config' }
}

// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
Expand Down
31 changes: 10 additions & 21 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -381,13 +381,13 @@
"silva=132",
"silva=138",
"unite-alleuk",
"unite-alleuk=8.2",
"unite-alleuk=8.3",
"unite-alleuk=9.0",
"unite-alleuk=8.3",
"unite-alleuk=8.2",
"unite-fungi",
"unite-fungi=8.2",
"unite-fungi=8.3",
"unite-fungi=9.0",
"unite-fungi=8.3",
"unite-fungi=8.2",
"zehr-nifh",
"zehr-nifh=2.5.0"
]
Expand Down Expand Up @@ -454,20 +454,7 @@
"type": "string",
"help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `silva=138`) . This will download the desired database and initiate taxonomic classification with QIIME2 and the chosen database.\n\nIf both, `--dada_ref_taxonomy` and `--qiime_ref_taxonomy` are used, DADA2 classification will be used for downstream analysis.\n\nThe following databases are supported:\n- SILVA ribosomal RNA gene database project - 16S rRNA\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n- Greengenes (only testing!)\n\nGenerally, using `silva`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version. For testing purposes, the tiny database `greengenes85` (dereplicated at 85% sequence similarity) is available. For details on what values are valid, please either use an invalid value such as `x` (causing the pipeline to send an error message with all valid values) or see `conf/ref_databases.config`.",
"description": "Name of supported database, and optionally also version number",
"enum": [
"silva=138",
"silva",
"unite-fungi=8.3",
"unite-fungi=8.2",
"unite-fungi",
"unite-alleuk=9.0",
"unite-alleuk=8.3",
"unite-alleuk=8.2",
"unite-alleuk",
"greengenes85",
"greengenes2",
"greengenes2=2022.10"
]
"enum": ["silva=138", "silva", "greengenes85", "greengenes2", "greengenes2=2022.10"]
},
"qiime_ref_tax_custom": {
"type": "string",
Expand Down Expand Up @@ -520,14 +507,16 @@
"enum": [
"coidb",
"coidb=221216",
"unite-fungi",
"unite-fungi=10.0",
"unite-fungi=9.0",
"unite-fungi=8.3",
"unite-fungi=8.2",
"unite-fungi",
"unite-alleuk",
"unite-alleuk=10.0",
"unite-alleuk=9.0",
"unite-alleuk=8.3",
"unite-alleuk=8.2",
"unite-alleuk"
"unite-alleuk=8.2"
]
},
"addsh": {
Expand Down
4 changes: 2 additions & 2 deletions subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,8 @@ def validateInputParameters() {
"rdp","rdp=18",
"sbdi-gtdb","sbdi-gtdb=R09-RS220-1","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1",
"silva","silva=138","silva=132",
"unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2",
"unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"
"unite-fungi","unite-fungi=10.0","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2",
"unite-alleuk","unite-alleuk=10.0","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"
]
if (params.sbdiexport){
if (params.sintax_ref_taxonomy ) {
Expand Down

0 comments on commit e057eba

Please sign in to comment.