-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 20b07b8
Showing
684 changed files
with
41,702 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
import os | ||
import pandas as pd | ||
import sys | ||
from qc.utils import FormatDataframeSamples | ||
from qc.qc_pipeline import qc_pipeline | ||
from tkinter import filedialog, Tk, Button, ttk, Entry, StringVar, messagebox, Label | ||
|
||
def import_list_ms_runs(): | ||
global df | ||
global filepath | ||
global userIonsFileLabel | ||
csv_file_path = filedialog.askopenfilename() | ||
print(csv_file_path) | ||
separator = "\t" | ||
if csv_file_path.endswith('.csv'): | ||
separator = ',' | ||
df = pd.read_csv(csv_file_path, sep=separator) | ||
|
||
df = FormatDataframeSamples(df, basePathCsvRuns=os.path.dirname(csv_file_path)) | ||
|
||
if len(df) == 0: | ||
return | ||
|
||
filepath.set(os.path.dirname(csv_file_path)) | ||
if os.path.exists(filepath.get() + "/User-Ions.csv"): | ||
userIonsFileLabel.set("User ions file: DETECTED User-Ions.csv") | ||
else: | ||
userIonsFileLabel.set("User ions file: NONE") | ||
|
||
#Clear the treeview list items | ||
for item in tree.get_children(): | ||
tree.delete(item) | ||
|
||
tree["columns"] = list(df.columns) | ||
for col in df.columns: | ||
tree.heading(col, text=col) | ||
for idx, (_, row) in enumerate(df.iterrows()): | ||
if idx % 2 == 0: | ||
tree.insert("", "end", text=str(idx), values=list(row), tags=("evenrow",)) | ||
else: | ||
tree.insert("", "end", text=str(idx), values=list(row)) | ||
|
||
def import_list_ms_runs_clipboard(): | ||
global df | ||
global filepath | ||
global userIonsFileLabel | ||
runs = [] | ||
paths = [] | ||
cb = root.clipboard_get() | ||
for item in cb.split('\n'): | ||
runx = os.path.basename(item) | ||
runs.append(runx) | ||
paths.append(item.removesuffix(runx)) | ||
|
||
if len(runs) == 0: | ||
return | ||
|
||
df = pd.DataFrame({"MSRUN": runs, "MSRUNPATH": paths}) | ||
df = FormatDataframeSamples(df) | ||
#Clear the treeview list items | ||
for item in tree.get_children(): | ||
tree.delete(item) | ||
|
||
tree["columns"] = list(df.columns) | ||
for col in df.columns: | ||
tree.heading(col, text=col) | ||
for idx, (_, row) in enumerate(df.iterrows()): | ||
if idx % 2 == 0: | ||
tree.insert("", "end", text=str(idx), values=list(row), tags=("evenrow",)) | ||
else: | ||
tree.insert("", "end", text=str(idx), values=list(row)) | ||
# Take the path of the first run as the result path: | ||
filepath.set(df["MSRUNPATH"][0]) | ||
if os.path.exists(filepath.get() + "/User-Ions.csv"): | ||
userIonsFileLabel.set("User ions file: DETECTED User-Ions.csv") | ||
else: | ||
userIonsFileLabel.set("User ions file: NONE") | ||
|
||
def call_backend_process(): | ||
global df | ||
global filepath | ||
if df is None or len(df) == 0: | ||
messagebox.showerror("Please import a list of MS runs.") | ||
return | ||
|
||
if os.path.exists(filepath.get() + "/config.toml"): | ||
qc_pipeline(df, filepath.get(), filepath.get() + "/config.toml") | ||
else: | ||
qc_pipeline(df, filepath.get()) | ||
|
||
|
||
if __name__ == "__main__": | ||
import multiprocessing | ||
multiprocessing.freeze_support() | ||
root = Tk() | ||
root.geometry('800x600') | ||
root.title("IonToolPack v1 | PeakQC") | ||
root.iconbitmap(sys.executable) | ||
|
||
tree = ttk.Treeview(root, height=20, show="headings") | ||
filepath = StringVar() | ||
userIonsFileLabel = StringVar() | ||
userIonsFileLabel.set("User ions file: NONE") | ||
|
||
Button(root, text='Import list of MS runs (.csv, .txt)', command=import_list_ms_runs).pack() | ||
Button(root, text='Paste MS runs from clipboard', command=import_list_ms_runs_clipboard).pack() | ||
tree.pack(fill="both") | ||
|
||
Label(root, text="Output path:").pack() # add label for file path | ||
Entry(root, textvariable=filepath).pack() # add text box for file path | ||
Label(root, textvariable=userIonsFileLabel).pack() # add label for user ions file | ||
Button(root, text='Process', command=call_backend_process).pack() | ||
|
||
root.mainloop() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
BSD 2-Clause License | ||
|
||
Copyright 2022 Battelle Memorial Institute | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are met: | ||
|
||
1. Redistributions of source code must retain the above copyright notice, this | ||
list of conditions and the following disclaimer. | ||
|
||
2. Redistributions in binary form must reproduce the above copyright notice, | ||
this list of conditions and the following disclaimer in the documentation | ||
and/or other materials provided with the distribution. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
|
||
# IonToolPack | PeakQC | ||
|
||
IonToolPack is a software suite for housing tools for mass spectrometry. The first one is PeakQC, a software a tool for automated quality control (QC) of mass spectrometry (MS) data which is is omics-agnostic (works for any ion type, e.g.: metabolomics, lipidomics, proteomics, etc.), supports various instrument platforms and acquisition modes and has a simple graphical user interface. | ||
|
||
## Usage | ||
1. Download the latest version (Release section, right panel) and uncompress it | ||
2. Double click IonToolPack.exe | ||
3. Import raw MS files and click “Process” | ||
4. See more details and examples in PeakQC_User-guide_*.pdf | ||
|
||
## MS data supported | ||
Supported formats include Agilent 'd', Bruker 'd' (improvements in progress), Thermo '.raw', and mzML, and for different types of MS acquisition methods: | ||
* LC-MS | ||
* LC-IMS-MS | ||
* With/without fragmentation spectra in DDA or DIA mode | ||
* Direct infusion | ||
|
||
## Contact | ||
|
||
aivett.bilbao@pnnl.gov | ||
|
||
## References | ||
|
||
If you this tool or any portions of this code please cite: | ||
* Harrison et al. "PeakQC: A Software Tool for Omics-Agnostic Automated Quality Control of Mass Spectrometry Data". Journal of the American Society for Mass Spectrometry 2024 https://doi.org/10.1021/jasms.4c00146. | ||
* Bilbao et al. "MZA: A Data Conversion Tool to Facilitate Software Development and Artificial Intelligence Research in Multidimensional Mass Spectrometry". Journal of Proteome Research 2023 https://doi.org/10.1021/acs.jproteome.2c00313. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# Configuration file | ||
|
||
# For overlaid figures, Half Window size of the view (+- center): | ||
MZVIEWHALFWINDOW = 0.7 | ||
RTVIEWHALFWINDOW = 1.5 | ||
ATVIEWHALFWINDOW = 2 | ||
|
||
# Mass tolerance for chromatogram extraction, Half Window m/z value (+- center or m/z target): | ||
MZXICHALFWINDOW = 0.01 | ||
|
||
# Error thresholds, absolute value: | ||
MZERRORPPM = 15 | ||
RTERROR = 0.3 | ||
ATERROR = 0.1 | ||
ABUNDANCEERROR = 30 # Percentage absolute error, a percentage of the mean of the ion abundance applied as a threshold to report QC ions outside tolerances | ||
|
||
AutoTrackedIonsTopN = 4 # Number of auto-tracked ions to detect per sample group | ||
MinIntensityPresencePercentage = 80 # Intensity threshold presence/absence | ||
|
||
MinMzDistDetectCentroidMS = 0.0005 # If distance between 2 consecutive points from the max intensity peak is smaller than this value then it is considered profile mode spectrum | ||
|
||
FigureLegendMaxNumberLines = 15 | ||
|
||
# MZA conversion: | ||
MinIntensityMza = 20 | ||
|
||
# time-vs-mz images: | ||
TimeVsMzImageMinIntensityPercentage = 10 | ||
TimeVsMzImageMaxIntensityCeilingPercentage = 70 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import numpy as np | ||
import pandas as pd | ||
from qc.ion_batch import GetHighResCoordinates | ||
|
||
def DetectTopmostIons(dfions, dfruns, topIons=4, minMzDistDetectCentroidMS=0.005): | ||
dfions = dfions.copy() | ||
dfions["rtRegion"] = 0 | ||
dfions["MZ"] = dfions["MZ"].astype(int) | ||
dftopmost = pd.DataFrame() | ||
Nrt = int(topIons) # take more blocks across RT because the mass is usually very stable and good | ||
selectedMzs = [] # to keep only the first MZ, each ion will be extracted from all runs | ||
for label in dfions['LABELSAMPLEGROUP'].unique(): | ||
df = dfions[(dfions["LABELSAMPLEGROUP"] == label) & (~dfions["MZ"].isin(selectedMzs))] | ||
if len(df) == 0: | ||
continue | ||
# Step 1: Partition the space | ||
rtbins = pd.cut(df['RT'], bins=Nrt, labels=False) | ||
# Step 2: Select rows with maximum frequency and intensity in each zone | ||
rtRegion = 1 | ||
for rt_bin in range(Nrt): | ||
zone_rows = df[(rtbins == rt_bin) & (~df["MZ"].isin(selectedMzs))] | ||
if not zone_rows.empty: | ||
zone_rows = zone_rows[zone_rows['FREQ'] == max(zone_rows['FREQ'])] | ||
#max_row = zone_rows.loc[(zone_rows['FREQ'] * zone_rows['INTENSITY']).idxmax()] | ||
max_row = zone_rows.loc[(zone_rows['INTENSITY']).idxmax()] | ||
df.at[max_row.name, 'rtRegion'] = rtRegion | ||
rtRegion = rtRegion + 1 | ||
selectedMzs.append(df.loc[max_row.name, "MZ"]) | ||
dftopmost = pd.concat([dftopmost, df[df["rtRegion"] > 0]]) | ||
|
||
dftopmost.reset_index(inplace=True, drop=True) | ||
# keep only the first MS run for each LABELSAMPLEGROUP | ||
dfruns = dfruns.copy() | ||
dfruns.drop_duplicates(['LABELSAMPLEGROUP'], inplace=True, keep='first') | ||
dfruns.reset_index(inplace=True, drop=True) | ||
for k in dftopmost.index: | ||
ionmz = dftopmost["MZ"][k] | ||
ionrt = dftopmost["RT"][k]/10 # <- ToDo: correct scaling in PCA.py | ||
mzaFile = list(dfruns["MZAPATH"][dfruns["LABELSAMPLEGROUP"] == (dftopmost["LABELSAMPLEGROUP"][k])])[0] + ".mza" | ||
[ionmz,ionrt] = GetHighResCoordinates(mzaFile, ionmz, ionrt, rtrange=1, mzrange=1, minMzDistCentroid=minMzDistDetectCentroidMS) # these tolerances must be kept at unit resolution | ||
dftopmost.loc[k,"MZ"] = ionmz | ||
dftopmost.loc[k,"RT"] = ionrt | ||
|
||
dftopmost = dftopmost[(dftopmost["MZ"] > 0) & (dftopmost["RT"] > 0)] | ||
dftopmost.sort_values(by=["LABELSAMPLEGROUP", "rtRegion", "FREQ", "INTENSITY"], ascending=[True, True, False, False], inplace=True) | ||
dftopmost.drop_duplicates(subset=["LABELSAMPLEGROUP", "rtRegion"], inplace=True, keep='first') | ||
dftopmost.drop(columns=["rtRegion"], inplace=True) | ||
dftopmost.reset_index(inplace=True, drop=True) | ||
dftopmost["MOLECULE"] = ["Ion" + str(k+1) for k in dftopmost.index] | ||
#dftopmost["MOLECULE"] += "-MZ" + str(round(dftopmost["MZ"], ndigits=2))+ "-RT" + str(round(dftopmost["RT"], ndigits=1)) | ||
return dftopmost |
Oops, something went wrong.