Skip to content

Commit

Permalink
remove import: arrangements
Browse files Browse the repository at this point in the history
  • Loading branch information
ChangqingW committed Oct 10, 2023
1 parent c1fb81e commit fa3c306
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 10 deletions.
1 change: 0 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ biocViews: RNASeq, SingleCell, Transcriptomics, DataImport,
License: GPL (>= 2)
Encoding: UTF-8
Imports:
arrangements,
basilisk,
bambu,
Biostrings,
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ importFrom(SummarizedExperiment,assays)
importFrom(SummarizedExperiment,colData)
importFrom(SummarizedExperiment,rowData)
importFrom(SummarizedExperiment,rowRanges)
importFrom(arrangements,combinations)
importFrom(bambu,bambu)
importFrom(bambu,prepareAnnotations)
importFrom(bambu,writeToGTF)
Expand Down
20 changes: 15 additions & 5 deletions R/find_isoform.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,14 @@ find_isoform_flames <- function(annotation, genome_fa, genome_bam, outdir, confi
}

#' GTF/GFF to FASTA conversion
#' @description convert the transcript annotation to transcriptome assembly as FASTA file.
#' @description convert the transcript annotation to transcriptome assembly as FASTA file. The
#' genome annotation is first imported as TxDb object and then used to extract transcript sequence
#' from the genome assembly.
#' @param isoform_annotation Path to the annotation file (GTF/GFF3)
#' @param genome_fa The file path to genome fasta file.
#' @param outdir The path to directory to store the transcriptome as \code{transcript_assembly.fa}.
#' @param extract_fn (optional) Function to extract \code{GRangesList} from the genome TxDb object.
#' E.g. \code{function(txdb){GenomicFeatures::cdsBy(txdb, by="tx", use.names=TRUE)}}
#' @return Path to the outputted transcriptome assembly
#'
#' @importFrom Biostrings readDNAStringSet writeXStringSet
Expand All @@ -125,15 +129,21 @@ find_isoform_flames <- function(annotation, genome_fa, genome_bam, outdir, confi
#' cat(readChar(fasta, nchars = 1e3))
#'
#' @export
annotation_to_fasta <- function(isoform_annotation, genome_fa, outdir) {
annotation_to_fasta <- function(isoform_annotation, genome_fa, outdir, extract_fn) {
out_file <- file.path(outdir, "transcript_assembly.fa")

dna_string_set <- Biostrings::readDNAStringSet(genome_fa)
names(dna_string_set) <- gsub(" .*$", "", names(dna_string_set))
txdb <- GenomicFeatures::makeTxDbFromGFF(isoform_annotation)
if (missing(extract_fn)) {
txdb <- GenomicFeatures::makeTxDbFromGFF(isoform_annotation)
tr_string_set <- GenomicFeatures::extractTranscriptSeqs(dna_string_set, txdb,
use.names = TRUE)
} else {
extracted_grl<- extract_fn(txdb)
tr_string_set <- GenomicFeatures::extractTranscriptSeqs(dna_string_set, extracted_grl)
# additional arguments are allowed only when 'transcripts' is not a GRangesList object
}

tr_string_set <- GenomicFeatures::extractTranscriptSeqs(dna_string_set, txdb,
use.names = TRUE)
if (length(names(tr_string_set)) > length(unique(names(tr_string_set)))) {
cat("Duplicated transcript IDs present, removing ...")
tr_string_set <- tr_string_set[unique(names(tr_string_set))]
Expand Down
1 change: 0 additions & 1 deletion R/model_decay.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#' @importFrom S4Vectors split
#' @importFrom GenomicRanges strand
#' @importFrom BiocGenerics start end
#' @importFrom arrangements combinations
#'
#' @param annotation path to the GTF annotation file, or the parsed GenomicRanges
#' object.
Expand Down
9 changes: 7 additions & 2 deletions man/annotation_to_fasta.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit fa3c306

Please sign in to comment.