Skip to content

Commit

Permalink
update the parser
Browse files Browse the repository at this point in the history
  • Loading branch information
youyupei committed Jun 22, 2024
1 parent 0d12d05 commit 5680b8c
Show file tree
Hide file tree
Showing 6 changed files with 370 additions and 28 deletions.
26 changes: 17 additions & 9 deletions blaze/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import os
import sys
import shutil
import time
from collections import namedtuple


Expand All @@ -25,27 +24,36 @@ def reverse_complement(seq):
[comp[base] if base in comp.keys() else base for base in seq]
return ''.join(letters)[::-1]

def err_msg(msg):
CRED = '\033[91m'
CEND = '\033[0m'
print(CRED + msg + CEND)

def warning_msg(msg, printit = True):
def err_msg(msg, printit = False):
CRED = '\033[91m'
CEND = '\033[0m'
if printit:
print(CRED + msg + CEND)
else:
return CRED + msg + CEND

def warning_msg(msg, printit = False):
CRED = '\033[93m'
CEND = '\033[0m'
if printit:
print(CRED + msg + CEND)
else:
return CRED + msg + CEND

def green_msg(msg, printit = True):
def green_msg(msg, printit = False):
CRED = '\033[92m'
CEND = '\033[0m'
if printit:
print(CRED + msg + CEND)
else:
return CRED + msg + CEND

def bold_text(text, printit = False):
if printit:
print(f"\033[1m{text}\033[0m")
else:
return f"\033[1m{text}\033[0m"

def sliding_window_sum(array, window) :
cum = np.cumsum(array)
return cum[window:] - cum[:-window]
Expand Down Expand Up @@ -257,7 +265,7 @@ def check_files_exist(file_list):
for fn in file_list:
if not os.path.exists(fn):
exit_code = 1
err_msg(f'Error: can not find {fn}')
err_msg(f'Error: can not find {fn}', printit=True)
if exit_code == 1:
sys.exit(1)
else:
Expand Down
31 changes: 19 additions & 12 deletions blaze/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
"""
from collections import defaultdict, Counter
from tqdm import tqdm
import multiprocessing as mp
import textwrap
import pandas as pd
import numpy as np
Expand All @@ -25,13 +24,13 @@
from fast_edit_distance import edit_distance
import logging


from blaze.parser import parse_arg
import blaze.helper as helper
from blaze.config import *
import blaze.polyT_adaptor_finder as polyT_adaptor_finder
import blaze.read_assignment as read_assignment


# setup logging
LOG_FORMAT = \
'(%(asctime)s) %(message)s'
Expand Down Expand Up @@ -135,15 +134,15 @@ def bc_search_qc_report(pass_count, args):
{total_read:,}
Reads with unambiguous polyT and adapter positions found:
{pass_count[0]+ pass_count[100]:,} ({(pass_count[0]+ pass_count[100])/total_read*100:.2f}% of all reads)
{pass_count[0]:,} in which all bases in the putative BC have Q>={args,minQ}
{pass_count[0]:,} in which all bases in the putative BC have Q>={args.minQ}
Failed Reads:
no polyT and adapter positions found:
{pass_count[1]:,} ({pass_count[1]/total_read*100:.2f}% of all reads)
polyT and adapter positions found in both end (fail to determine strand):
{pass_count[2]:,} ({pass_count[2]/total_read*100:.2f}% of all reads)
multiple polyT and adapter found in one end
{pass_count[10]:,} ({pass_count[10]/total_read*100:.2f}% of all reads)
-------------------------------------------------------------------------------\n'
-------------------------------------------------------------------------------\n
''')
return print_message

Expand Down Expand Up @@ -319,11 +318,17 @@ def print_logo(args):
'''))

def main():
args = parse_arg()

# Start running: Welcome logo
args = parse_arg()
print_logo(args)

# TMP: print all the arguments in args
#for arg in vars(args):
# print(f"{arg}: {getattr(args, arg)}")




######################
###### Getting putative barcodes
######################
Expand All @@ -346,17 +351,19 @@ def main():
else:
rst_df.to_csv(args.out_raw_bc_fn, mode='a', index=False, header=False)

helper.green_msg(f'Putative barcode table saved in {args.out_raw_bc_fn}')
helper.green_msg(f'Putative barcode table saved in {args.out_raw_bc_fn}', printit=True)

# ----------------------stats of the putative barcodes--------------------------

add_summary(bc_search_qc_report(raw_bc_pass_count, args),
args, write_mode='w')
args=args, write_mode='w')

######################
###### Whitelisting
######################
if args.do_whitelisting:
# get bc count dict (filtered by minQ)
dfs = pd.read_csv(args.out_raw_bc_fn, chunksize=1_000_000)
raw_bc_count = Counter()
for df in tqdm(dfs, desc = 'Counting high-quality putative BC', unit='M reads'):
raw_bc_count += Counter(df[
Expand All @@ -372,17 +379,17 @@ def main():
with open(args.out_emptydrop_fn, 'w') as f:
for k in ept_bc:
f.write(k+'\n')
helper.green_msg(f'Empty droplet barcode list saved as `{args.out_emptydrop_fn}`.')
helper.green_msg(f'Empty droplet barcode list saved as `{args.out_emptydrop_fn}`.', printit=True)
# write to summary
add_summary(f'\nIdentified # of cells: {len(bc_whitelist)}\n', args, write_mode='a')

except Exception as e:
logger.exception(e)
helper.err_msg(
"Error: Failed to get whitelist. Please check the input files and settings."
"Error: Failed to get whitelist. Please check the input files and settings." , printit=True
)
else:
helper.green_msg(f'Whitelist saved as `{args.out_whitelist_fn}`!')
helper.green_msg(f'Whitelist saved as `{args.out_whitelist_fn}`!', printit=True)


######################
Expand All @@ -391,7 +398,7 @@ def main():
if args.do_demultiplexing:
logger.info("Assigning reads to whitelist.\n")
# write to fastq
demul_count_tot, count_tot = read_assignment.assign_read(args)
demul_count_tot, count_tot = read_assignment.assign_read(args=args)
# write to summary
add_summary(f'\nTotal reads: {count_tot}'
f'\nTotal reads in cells: {demul_count_tot} ({demul_count_tot/count_tot*100:.2f}%)', args, write_mode='a')
Expand Down
Loading

0 comments on commit 5680b8c

Please sign in to comment.