Skip to content

Commit

Permalink
Merge
Browse files Browse the repository at this point in the history
  • Loading branch information
youyupei committed Oct 13, 2023
2 parents 224db35 + 6ce64d3 commit 337beeb
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 11 deletions.
1 change: 0 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ biocViews: RNASeq, SingleCell, Transcriptomics, DataImport,
License: GPL (>= 2)
Encoding: UTF-8
Imports:
arrangements,
basilisk,
bambu,
Biostrings,
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ importFrom(SummarizedExperiment,assays)
importFrom(SummarizedExperiment,colData)
importFrom(SummarizedExperiment,rowData)
importFrom(SummarizedExperiment,rowRanges)
importFrom(arrangements,combinations)
importFrom(bambu,bambu)
importFrom(bambu,prepareAnnotations)
importFrom(bambu,writeToGTF)
Expand Down
20 changes: 15 additions & 5 deletions R/find_isoform.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,14 @@ find_isoform_flames <- function(annotation, genome_fa, genome_bam, outdir, confi
}

#' GTF/GFF to FASTA conversion
#' @description convert the transcript annotation to transcriptome assembly as FASTA file.
#' @description convert the transcript annotation to transcriptome assembly as FASTA file. The
#' genome annotation is first imported as TxDb object and then used to extract transcript sequence
#' from the genome assembly.
#' @param isoform_annotation Path to the annotation file (GTF/GFF3)
#' @param genome_fa The file path to genome fasta file.
#' @param outdir The path to directory to store the transcriptome as \code{transcript_assembly.fa}.
#' @param extract_fn (optional) Function to extract \code{GRangesList} from the genome TxDb object.
#' E.g. \code{function(txdb){GenomicFeatures::cdsBy(txdb, by="tx", use.names=TRUE)}}
#' @return Path to the outputted transcriptome assembly
#'
#' @importFrom Biostrings readDNAStringSet writeXStringSet
Expand All @@ -125,7 +129,7 @@ find_isoform_flames <- function(annotation, genome_fa, genome_bam, outdir, confi
#' cat(readChar(fasta, nchars = 1e3))
#'
#' @export
annotation_to_fasta <- function(isoform_annotation, genome_fa, outdir) {
annotation_to_fasta <- function(isoform_annotation, genome_fa, outdir, extract_fn) {
# check if all the transcript in the annotation is stranded
annotation_d <- read.csv(isoform_annotation, sep = "\t",
header = FALSE, stringsAsFactors = FALSE,
Expand All @@ -146,10 +150,16 @@ annotation_to_fasta <- function(isoform_annotation, genome_fa, outdir) {

dna_string_set <- Biostrings::readDNAStringSet(genome_fa)
names(dna_string_set) <- gsub(" .*$", "", names(dna_string_set))
txdb <- GenomicFeatures::makeTxDbFromGFF(isoform_annotation)
if (missing(extract_fn)) {
txdb <- GenomicFeatures::makeTxDbFromGFF(isoform_annotation)
tr_string_set <- GenomicFeatures::extractTranscriptSeqs(dna_string_set, txdb,
use.names = TRUE)
} else {
extracted_grl<- extract_fn(txdb)
tr_string_set <- GenomicFeatures::extractTranscriptSeqs(dna_string_set, extracted_grl)
# additional arguments are allowed only when 'transcripts' is not a GRangesList object
}

tr_string_set <- GenomicFeatures::extractTranscriptSeqs(dna_string_set, txdb,
use.names = TRUE)
if (length(names(tr_string_set)) > length(unique(names(tr_string_set)))) {
cat("Duplicated transcript IDs present, removing ...")
tr_string_set <- tr_string_set[unique(names(tr_string_set))]
Expand Down
1 change: 0 additions & 1 deletion R/model_decay.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#' @importFrom S4Vectors split
#' @importFrom GenomicRanges strand
#' @importFrom BiocGenerics start end
#' @importFrom arrangements combinations
#'
#' @param annotation path to the GTF annotation file, or the parsed GenomicRanges
#' object.
Expand Down
9 changes: 7 additions & 2 deletions man/annotation_to_fasta.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/flexiplex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ EdlibAlignConfig edlibConf = {flank_max_editd, EDLIB_MODE_HW, EDLIB_TASK_PATH,

std::vector<long unsigned int> subpattern_ends;
subpattern_ends.resize(subpattern_lengths.size());
std::inclusive_scan(subpattern_lengths.begin(), subpattern_lengths.end(),
std::partial_sum(subpattern_lengths.begin(), subpattern_lengths.end(),
subpattern_ends.begin());

std::vector<int> read_to_subpatterns;
Expand Down

0 comments on commit 337beeb

Please sign in to comment.