From a6b16392ff5536b6588dc0b14228a790b0d33866 Mon Sep 17 00:00:00 2001 From: MICHAEL SHORTREED Date: Thu, 18 Nov 2021 12:30:51 -0600 Subject: [PATCH 1/7] correct Within calculation --- MassSpectrometry/MzSpectra/SpectralSimilarity.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MassSpectrometry/MzSpectra/SpectralSimilarity.cs b/MassSpectrometry/MzSpectra/SpectralSimilarity.cs index e9600bfdd..4fcc00e77 100644 --- a/MassSpectrometry/MzSpectra/SpectralSimilarity.cs +++ b/MassSpectrometry/MzSpectra/SpectralSimilarity.cs @@ -14,7 +14,7 @@ public SpectralSimilarity(MzSpectrum primary, MzSpectrum secondary, SpectrumNorm primaryXArray = primary.XArray; secondaryYarray = Normalize(secondary.YArray, scheme); secondaryXArray = secondary.XArray; - localTolerance = toleranceInPpm / 1000000.0; + ppmTolerance = toleranceInPpm; _intensityPairs = IntensityPairs(allPeaks); } @@ -24,7 +24,7 @@ public SpectralSimilarity(MzSpectrum primary, double[] secondaryX, double[] seco primaryXArray = primary.XArray; secondaryYarray = Normalize(secondaryY, scheme); secondaryXArray = secondaryX; - localTolerance = toleranceInPpm / 1000000.0; + ppmTolerance = toleranceInPpm; _intensityPairs = IntensityPairs(allPeaks); } @@ -32,7 +32,7 @@ public SpectralSimilarity(MzSpectrum primary, double[] secondaryX, double[] seco public double[] primaryXArray { get; private set; } public double[] secondaryYarray { get; private set; } public double[] secondaryXArray { get; private set; } - private double localTolerance; + private double ppmTolerance; private List<(double, double)> _intensityPairs = new List<(double, double)>(); public List<(double, double)> intensityPairs @@ -236,7 +236,7 @@ public double DotProduct() private bool Within(double mz1, double mz2) { - return (Math.Abs(mz1 - mz2) < localTolerance); + return (Math.Abs(mz1 - mz2)/mz1*1000000.0 < ppmTolerance); } public enum SpectrumNormalizationScheme From fa4da8bb9e174a4343c6b530b47172f1449c730e Mon Sep 17 00:00:00 2001 From: MICHAEL SHORTREED Date: Thu, 18 Nov 2021 12:42:10 -0600 Subject: [PATCH 2/7] update unit tests --- Test/TestSpectralSimilarity.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Test/TestSpectralSimilarity.cs b/Test/TestSpectralSimilarity.cs index 356158e04..ebc19e9af 100644 --- a/Test/TestSpectralSimilarity.cs +++ b/Test/TestSpectralSimilarity.cs @@ -117,10 +117,10 @@ public void TestAllSpectrumSimilarities() //explore bounds of binary search primary = new MzSpectrum(new double[] { 1, 2, 3, 4 }, new double[] { 1, 2, 3, 4 }, false); - secondary = new MzSpectrum(new double[] { 1.000009, 1.99999, 3.00004, 3.99995 }, new double[] { 1, 2, 3, 4 }, false); + secondary = new MzSpectrum(new double[] { 1.000011, 1.99997, 3.000031, 3.99995 }, new double[] { 1, 2, 3, 4 }, false); s = new SpectralSimilarity(primary, secondary, SpectralSimilarity.SpectrumNormalizationScheme.spectrumSum, ppmTolerance, true); - Assert.AreEqual(7, s.intensityPairs.Count); + Assert.AreEqual(8, s.intensityPairs.Count); //Test alternate constructor primary = new MzSpectrum(new double[] { 1, 2, 3 }, new double[] { 2, 4, 6 }, false); From 892fa451204f0b54f3e54301a0bc5b250410f48a Mon Sep 17 00:00:00 2001 From: MICHAEL SHORTREED Date: Fri, 18 Feb 2022 09:57:06 -0600 Subject: [PATCH 3/7] this is the spot --- FlashLFQ/FlashLfqEngine.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/FlashLFQ/FlashLfqEngine.cs b/FlashLFQ/FlashLfqEngine.cs index 0bc9d5818..6037e2098 100644 --- a/FlashLFQ/FlashLfqEngine.cs +++ b/FlashLFQ/FlashLfqEngine.cs @@ -914,6 +914,8 @@ private void QuantifyMatchBetweenRunsPeaks(SpectraFileInfo idAcceptorFile) } } + //here is where we get the cosine similarity. + _results.Peaks[idAcceptorFile].Add(best); } From b4ad2317ed6e7df53d0d1956faa79692a669c95b Mon Sep 17 00:00:00 2001 From: Michael Shortreed Date: Thu, 29 Jun 2023 11:09:39 -0500 Subject: [PATCH 4/7] add space --- mzLib/Test/TestBayesianEstimation.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/mzLib/Test/TestBayesianEstimation.cs b/mzLib/Test/TestBayesianEstimation.cs index e7045e5a6..05c83a0e9 100644 --- a/mzLib/Test/TestBayesianEstimation.cs +++ b/mzLib/Test/TestBayesianEstimation.cs @@ -95,6 +95,7 @@ public static void TestOneSampleBayesianEstimation() Assert.That(Math.Round(highestDensityInterval.hdi_end, 3) == 1.062); } + [Test] /// /// Bayesian estimation of the difference in means between two samples. From adefa92175bc3d250434aa47d80ef7f5e388caf4 Mon Sep 17 00:00:00 2001 From: Michael Shortreed Date: Wed, 14 Feb 2024 11:00:22 -0600 Subject: [PATCH 5/7] unused using --- mzLib/mzPlot/Annotations/PlotTextAnnotation.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/mzLib/mzPlot/Annotations/PlotTextAnnotation.cs b/mzLib/mzPlot/Annotations/PlotTextAnnotation.cs index b98f7838e..24a9552e6 100644 --- a/mzLib/mzPlot/Annotations/PlotTextAnnotation.cs +++ b/mzLib/mzPlot/Annotations/PlotTextAnnotation.cs @@ -1,6 +1,5 @@ using OxyPlot; using OxyPlot.Annotations; -using System; using System.Collections.Generic; using System.Text; From 692ad85297eb36d0ff799f08cb9dcbd7c3c1db2f Mon Sep 17 00:00:00 2001 From: Michael Shortreed Date: Wed, 14 Feb 2024 11:00:48 -0600 Subject: [PATCH 6/7] vnc --- mzLib/mzPlot/Annotations/PlotTextAnnotation.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/mzLib/mzPlot/Annotations/PlotTextAnnotation.cs b/mzLib/mzPlot/Annotations/PlotTextAnnotation.cs index 24a9552e6..b98f7838e 100644 --- a/mzLib/mzPlot/Annotations/PlotTextAnnotation.cs +++ b/mzLib/mzPlot/Annotations/PlotTextAnnotation.cs @@ -1,5 +1,6 @@ using OxyPlot; using OxyPlot.Annotations; +using System; using System.Collections.Generic; using System.Text; From 65a0c48ede7eb435408ed50496dc0511f4b7d0a2 Mon Sep 17 00:00:00 2001 From: Michael Shortreed Date: Thu, 23 May 2024 12:54:19 -0500 Subject: [PATCH 7/7] scripts --- mzLib/Test/TestSpectra.cs | 817 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 817 insertions(+) diff --git a/mzLib/Test/TestSpectra.cs b/mzLib/Test/TestSpectra.cs index 8a8b0eb15..450c50387 100644 --- a/mzLib/Test/TestSpectra.cs +++ b/mzLib/Test/TestSpectra.cs @@ -19,9 +19,13 @@ using MassSpectrometry; using MzLibUtil; using NUnit.Framework; +using Proteomics.PSM; +using Readers; using System; using System.Collections.Generic; +using System.IO; using System.Linq; +using System.Text.RegularExpressions; using Stopwatch = System.Diagnostics.Stopwatch; namespace Test @@ -55,6 +59,819 @@ public void Setup() _mzSpectrumA = new MzSpectrum(mz, intensities, false); } + + + [Test] + public static void MoreJunk() + { + List quantResults = File.ReadAllLines(@"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\AllQuantifiedPeptidesMarkovich.tsv").ToList(); + List fullSequences = new List(); + foreach (var line in quantResults) + { + string[] fields = line.Split('\t'); + fullSequences.Add(fields[0]); + } + + string psmFilePath = + @"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\AllPeptidesMarkovich.psmtsv"; + List parsedPsms = SpectrumMatchTsvReader.ReadPsmTsv(psmFilePath, out var warnings); + + List quantPeptidesFoundInAllPeptides = new List(); + foreach (var psm in parsedPsms) + { + bool containsAny = fullSequences.Any(s => psm.FullSequence.Contains(s)); + if (containsAny) + { + fullSequences.Remove(psm.FullSequence); + quantPeptidesFoundInAllPeptides.Add(psm); + } + + if (fullSequences.Count == 0) + { + break; + } + } + + List myOut = new List(); + //myOut.Add(parsedPsms[0].ToString()); + foreach (var psm in quantPeptidesFoundInAllPeptides) + { + + myOut.Add(psm.ToString()); + + } + + File.WriteAllLines(@"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\psmsFoundInQuantMarkovich.txt", myOut); + + } + [Test] + public static void Junk3() + { + + string psmFilePath = + @"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\AllPeptidesMarkovich.psmtsv"; + List parsedPsms = SpectrumMatchTsvReader.ReadPsmTsv(psmFilePath, out var warnings); + parsedPsms = parsedPsms.Where(p => p.QValue < .01).ToList(); + parsedPsms = parsedPsms.Where(p => p.DecoyContamTarget.Contains("T")).ToList(); + + List interestingMods = new List { "Y[Common Biological:Phosphorylation on Y]", "S[Common Biological:Phosphorylation on S]", "T[Common Biological:Phosphorylation on T]" }; + + Dictionary<(string, int), List> genePositionPsm = new Dictionary<(string, int), List>(); + + foreach (var psm in parsedPsms) + { + List foundMods = interestingMods.Where(s => psm.FullSequence.Contains(s)).ToList(); + if (foundMods.Any()) + { + string sequence = psm.FullSequence; + foreach (var mod in foundMods) + { + string firstCharacter = mod.Substring(0, 1); + if (firstCharacter == "Y" || firstCharacter == "S" || firstCharacter == "T") + { + sequence = sequence.Replace(mod, firstCharacter.ToLowerInvariant()); + } + } + sequence = Regex.Replace(sequence, "\\[(.*?)\\]", ""); + List lowercaseLetterPositions = new List(); + + for (int i = 0; i < sequence.Length; i++) + { + if (char.IsLower(sequence[i])) + { + lowercaseLetterPositions.Add(i); + } + } + string firstAndLastAminoAcidPositionInProtein = psm.StartAndEndResiduesInProtein.Split('|')[0]; + firstAndLastAminoAcidPositionInProtein = firstAndLastAminoAcidPositionInProtein.Substring(1, firstAndLastAminoAcidPositionInProtein.Length - 2); + firstAndLastAminoAcidPositionInProtein = firstAndLastAminoAcidPositionInProtein.Replace(" to ", "\t"); + int[] startEnd = firstAndLastAminoAcidPositionInProtein.Split('\t').Select(int.Parse).ToArray(); + lowercaseLetterPositions = lowercaseLetterPositions.Select(s => s + startEnd[0]).ToList(); + + string allGenesInPsm = psm.GeneName; + string[] genes = allGenesInPsm.Split('|'); + string firstGene = genes[0].Split(':')[1]; + + foreach (int position in lowercaseLetterPositions) + { + if (genePositionPsm.ContainsKey((firstGene, position))) + { + genePositionPsm[(firstGene, position)].Add(psm); + + } + else + { + genePositionPsm.Add((firstGene, position), new List { psm }); + } + } + } + } + + List myOut = new List(); + myOut.Add("position" + "\t" + "protein accession" + "\t" + "gene" + "\t" + "full sequence"); + + foreach (var kvp in genePositionPsm) + { + if (kvp.Value.Count > 1) + { + foreach (var psm in kvp.Value) + { + myOut.Add(kvp.Key.Item2 + "\t" + psm.ProteinAccession + "\t" + kvp.Key.Item1 + "\t" + psm.FullSequence); + } + } + } + + File.WriteAllLines(@"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\phosphoPeptidesMarkovich.txt", myOut); + + } + + [Test] + public static void Junk4() + { + Dictionary<(string,string),string> baseSequencefullSequencQvaluePepQvalueforPSMs = new Dictionary<(string, string), string>(); + + using (StreamReader sr = new StreamReader(@"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\AllPSMsMarkovich.psmtsv")) + { + bool continueReading = true; + string line; + while ((line = sr.ReadLine()) != null && continueReading) + { + string[] fields = line.Split('\t'); + string baseSequence = fields[12]; + string fullSequence = fields[13]; + string targetDecoyContam = fields[38]; + string qValue = fields[50]; + string pepQValue = fields[55]; + if (targetDecoyContam == "T" && double.TryParse(qValue, out double qValueDouble)) + { + if (qValueDouble < 0.01 &&!baseSequencefullSequencQvaluePepQvalueforPSMs.ContainsKey((baseSequence,fullSequence))) + { + baseSequencefullSequencQvaluePepQvalueforPSMs.Add((baseSequence, fullSequence), qValue + "\t" + pepQValue); + } + + } + + if (double.TryParse(qValue, out double qValueDouble2)) + { + if (qValueDouble2 > 0.01) + { + continueReading = false; + } + } + } + } + + string psmFilePath = + @"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\AllPeptidesMarkovich.psmtsv"; + List parsedPeptides = SpectrumMatchTsvReader.ReadPsmTsv(psmFilePath, out var warnings); + parsedPeptides = parsedPeptides.Where(p => p.DecoyContamTarget.Contains("T")).ToList(); + + List interestingMods = new List { "S[Common Biological:Phosphorylation on S]", "T[Common Biological:Phosphorylation on T]", "S[Less Common:Dehydroalanine on S]", "C[Less Common:Dehydroalanine on C]", + "S[Custom:Homocys on S]", "C[Custom:Homocys on C]", "T[Custom:Homocys on T]", "T[Less Common:Dehydrobutyrine on T]", "S[Custom:DTT on S]", "C[Custom:DTT on C]", "C[Custom:DTT on T]", "T[Custom:Glutathione on T]", + "S[Custom:Glutathione on S]", "C[Custom:Glutathione on C]", "S[Custom:TCEP on S]", "T[Custom:TCEP on T]", "C[Custom:TCEP on C]" }; + + Dictionary<(string, int), List> genePositionPeptides = new Dictionary<(string, int), List>(); + Dictionary<(string, int), List> genePositionMod = new Dictionary<(string, int), List>(); + Dictionary> noInterestingModsPeptideValues = new Dictionary>(); + + foreach (var peptide in parsedPeptides) + { + if (baseSequencefullSequencQvaluePepQvalueforPSMs.ContainsKey((peptide.BaseSeq,peptide.FullSequence))) + { + List foundMods = interestingMods.Where(s => peptide.FullSequence.Contains(s)).ToList(); + if (foundMods.Any()) + { + foreach (var mod in foundMods) + { + string sequence = peptide.FullSequence; + string firstCharacter = mod.Substring(0, 1); + if (firstCharacter == "Y" || firstCharacter == "S" || firstCharacter == "T" || + firstCharacter == "C") + { + sequence = sequence.Replace(mod, firstCharacter.ToLowerInvariant()); + } + + //eliminate the remaning mods + while (sequence.Contains("[") && sequence.Contains("]")) + { + int firstOpenBracket = sequence.IndexOf('['); + int firstCloseBracket = sequence.IndexOf(']', firstOpenBracket); + if (firstCloseBracket != -1) + { + sequence = sequence.Remove(firstOpenBracket, firstCloseBracket - firstOpenBracket + 1); + } + else + { + break; + } + } + + List lowercaseLetterPositions = new List(); + + for (int i = 0; i < sequence.Length; i++) + { + if (char.IsLower(sequence[i])) + { + lowercaseLetterPositions.Add(i); + } + } + + string firstAndLastAminoAcidPositionInProtein = + peptide.StartAndEndResiduesInProtein.Split('|')[0]; + firstAndLastAminoAcidPositionInProtein = + firstAndLastAminoAcidPositionInProtein.Substring(1, + firstAndLastAminoAcidPositionInProtein.Length - 2); + firstAndLastAminoAcidPositionInProtein = + firstAndLastAminoAcidPositionInProtein.Replace(" to ", "\t"); + int[] startEnd = firstAndLastAminoAcidPositionInProtein.Split('\t').Select(int.Parse) + .ToArray(); + lowercaseLetterPositions = lowercaseLetterPositions.Select(s => s + startEnd[0]).ToList(); + + string allGenesInPsm = peptide.GeneName; + string firstGene = ""; + if (allGenesInPsm.Contains("|")) + { + string[] genes = allGenesInPsm.Split('|'); + + if (genes[0].Contains(":")) + { + firstGene = genes[0].Split(':')[1]; + + foreach (int position in lowercaseLetterPositions) + { + if (genePositionPeptides.ContainsKey((firstGene, position))) + { + genePositionPeptides[(firstGene, position)].Add(peptide); + genePositionMod[(firstGene, position)] + .Add(mod + "\t" + peptide.QValue + "\t" + peptide.PEP_QValue); + + } + else + { + genePositionPeptides.Add((firstGene, position), new List { peptide }); + genePositionMod.Add((firstGene, position), + new List { mod + "\t" + peptide.QValue + "\t" + peptide.PEP_QValue }); + } + } + } + else + { + firstGene = genes[0]; + if (genes[0].Contains(":")) + { + firstGene = genes[0].Split(':')[1]; + } + foreach (int position in lowercaseLetterPositions) + { + if (genePositionPeptides.ContainsKey((firstGene, position))) + { + genePositionPeptides[(firstGene, position)].Add(peptide); + genePositionMod[(firstGene, position)] + .Add(mod + "\t" + peptide.QValue + "\t" + peptide.PEP_QValue); + + } + else + { + genePositionPeptides.Add((firstGene, position), new List { peptide }); + genePositionMod.Add((firstGene, position), + new List { mod + "\t" + peptide.QValue + "\t" + peptide.PEP_QValue }); + } + } + } + } + else + { + firstGene = peptide.GeneName; + if (peptide.GeneName.Contains(":")) + { + firstGene = peptide.GeneName.Split(':')[1]; + } + + foreach (int position in lowercaseLetterPositions) + { + if (genePositionPeptides.ContainsKey((firstGene, position))) + { + genePositionPeptides[(firstGene, position)].Add(peptide); + genePositionMod[(firstGene, position)] + .Add(mod + "\t" + peptide.QValue + "\t" + peptide.PEP_QValue); + + } + else + { + genePositionPeptides.Add((firstGene, position), new List { peptide }); + genePositionMod.Add((firstGene, position), + new List { mod + "\t" + peptide.QValue + "\t" + peptide.PEP_QValue }); + } + } + } + } + } + else + { + if (noInterestingModsPeptideValues.ContainsKey(peptide.BaseSeq)) + { + noInterestingModsPeptideValues[peptide.BaseSeq].Add(peptide); + } + else + { + noInterestingModsPeptideValues.Add(peptide.BaseSeq, new List { peptide }); + } + + } + } + } + + List myOut = new List(); + myOut.Add("position" + "\t" + "protein accession" + "\t" + "gene" + "\t" + "base sequence" + "\t" + "full sequence" + "\t" + "modification" + "\t" + "Peptide Q-value" + "\t" + "Peptide PEP Q-Value" + "\t" + "PSM Q-value" + "\t" + "PSM PEP Q-Value"); + + foreach (var kvp in genePositionPeptides) + { + if (kvp.Value.Count > 1) + { + foreach (var psm in kvp.Value) + { + int index = kvp.Value.IndexOf(psm); + if(baseSequencefullSequencQvaluePepQvalueforPSMs.ContainsKey((psm.BaseSeq, psm.FullSequence))) + { + myOut.Add(kvp.Key.Item2 + "\t" + psm.ProteinAccession + "\t" + kvp.Key.Item1 + "\t" + psm.BaseSeq + "\t" + psm.FullSequence + "\t" + genePositionMod[kvp.Key][index] + "\t" + baseSequencefullSequencQvaluePepQvalueforPSMs[(psm.BaseSeq, psm.FullSequence)]); + if (noInterestingModsPeptideValues.ContainsKey(psm.BaseSeq)) + { + foreach (var psm2 in noInterestingModsPeptideValues[psm.BaseSeq]) + { + myOut.Add(kvp.Key.Item2 + "\t" + psm2.ProteinAccession + "\t" + kvp.Key.Item1 + "\t" + psm2.BaseSeq + "\t" + psm2.FullSequence + "\t" + "No Modifications of Interest" + "\t" + psm2.QValue + "\t" + psm2.PEP_QValue + "\t" + baseSequencefullSequencQvaluePepQvalueforPSMs[(psm2.BaseSeq, psm2.FullSequence)]); + } + noInterestingModsPeptideValues.Remove(psm.BaseSeq); + } + } + else + { + myOut.Add(kvp.Key.Item2 + "\t" + psm.ProteinAccession + "\t" + kvp.Key.Item1 + "\t" + psm.BaseSeq + "\t" + psm.FullSequence + "\t" + genePositionMod[kvp.Key][index]); + if (noInterestingModsPeptideValues.ContainsKey(psm.BaseSeq)) + { + foreach (var psm2 in noInterestingModsPeptideValues[psm.BaseSeq]) + { + string psmQvalues = baseSequencefullSequencQvaluePepQvalueforPSMs[(psm2.BaseSeq, psm2.FullSequence)]; + myOut.Add(kvp.Key.Item2 + "\t" + psm2.ProteinAccession + "\t" + kvp.Key.Item1 + "\t" + psm2.BaseSeq + "\t" + psm2.FullSequence + "\t" + "No Modifications of Interest" + "\t" + psm2.QValue + "\t" + psm2.PEP_QValue + "\t" + psmQvalues); + + } + noInterestingModsPeptideValues.Remove(psm.BaseSeq); + } + } + + } + } + } + + File.WriteAllLines(@"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\modPeptidesMarkovichWithPosition.txt", myOut); + + } + + + + [Test] + public static void Junk5() + { + List interestingMods = new List { "S[Common Biological:Phosphorylation on S]", "T[Common Biological:Phosphorylation on T]", "S[Less Common:Dehydroalanine on S]", "C[Less Common:Dehydroalanine on C]", + "S[Custom:Homocys on S]", "C[Custom:Homocys on C]", "T[Custom:Homocys on T]", "T[Less Common:Dehydrobutyrine on T]", "S[Custom:DTT on S]", "C[Custom:DTT on C]", "C[Custom:DTT on T]", "T[Custom:Glutathione on T]", + "S[Custom:Glutathione on S]", "C[Custom:Glutathione on C]", "S[Custom:TCEP on S]", "T[Custom:TCEP on T]", "C[Custom:TCEP on C]" }; + + //string is position accession gene + //dictionary key is mod and int is count + Dictionary> bubba = new Dictionary>(); + using (StreamReader sr = + new StreamReader( + @"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\modPeptidesMarkovichWithPosition.txt")) + { + bool continueReading = true; + string line; + + while ((line = sr.ReadLine()) != null && continueReading) + { + + string[] fields = line.Split('\t'); + string myKey = fields[0] + "\t" + fields[1] + "\t" + fields[2]; + + bool goodLine = (double.TryParse(fields[5], out double qvalue) && qvalue < 0.01 ); + + if (goodLine) + { + if (bubba.ContainsKey(myKey)) + { + if (bubba[myKey].ContainsKey(fields[4])) + { + bubba[myKey][fields[4]]++; + } + else + { + bubba[myKey].Add(fields[4], 1); + } + } + else + { + bubba.Add(myKey, new Dictionary { { fields[4], 1 } }); + } + } + } + } + + List myOut = new List(); + myOut.Add("position" + "\t" + "protein accession" + "\t" + "gene" + "\t" + String.Join('\t',interestingMods)); + + foreach (var kvp in bubba) + { + string myLine = kvp.Key +"\t"; + foreach (var mod in interestingMods) + { + if (kvp.Value.ContainsKey(mod)) + { + myLine += kvp.Value[mod] + "\t"; + } + else + { + myLine += "0\t"; + } + } + myOut.Add(myLine); + } + + File.WriteAllLines(@"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\tableAllPeptidesMarkovich.txt", myOut); + + } + + [Test] + public static void Junk6() + { + Dictionary<(int,string,string),List> modPeptidesWithPositionLines = new Dictionary<(int, string, string), List>(); + Dictionary<(int, string, string), List> modPeptidesWithPositionFullSequences = new Dictionary<(int, string, string), List>(); + Dictionary<(int, string, string), List> modPeptidesWithPositionBaseSequences = new Dictionary<(int, string, string), List>(); + Dictionary> basePeptideToFullSequences = new Dictionary>(); + + using (StreamReader sr = + new StreamReader( + @"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\modPeptides180WithPosition.txt")) + { + bool continueReading = true; + string line; + bool firstLine = true; + + while ((line = sr.ReadLine()) != null && continueReading) + { + if (!firstLine) + { + string[] fields = line.Split('\t'); + string myKey = fields[0] + "\t" + fields[1] + "\t" + fields[2]; + + string baseSequence = fields[3]; + while (baseSequence.Contains("[") && baseSequence.Contains("]")) + { + int firstOpenBracket = baseSequence.IndexOf('['); + int firstCloseBracket = baseSequence.IndexOf(']', firstOpenBracket); + if (firstCloseBracket != -1) + { + baseSequence = baseSequence.Remove(firstOpenBracket, firstCloseBracket - firstOpenBracket + 1); + } + else + { + break; + } + } + + + + + if (modPeptidesWithPositionLines.ContainsKey((int.Parse(fields[0]), fields[1], fields[2]))) + { + modPeptidesWithPositionLines[(int.Parse(fields[0]), fields[1], fields[2])].Add(line); + } + else + { + modPeptidesWithPositionLines.Add((int.Parse(fields[0]), fields[1], fields[2]), new List { line }); + } + + if (modPeptidesWithPositionFullSequences.ContainsKey((int.Parse(fields[0]), fields[1], + fields[2]))) + { + modPeptidesWithPositionFullSequences[(int.Parse(fields[0]), fields[1], fields[2])].Add(fields[3]); + } + else + { + modPeptidesWithPositionFullSequences.Add((int.Parse(fields[0]), fields[1], fields[2]), new List { fields[3] }); + } + + if (modPeptidesWithPositionBaseSequences.ContainsKey((int.Parse(fields[0]), fields[1], + fields[2]))) + { + modPeptidesWithPositionBaseSequences[(int.Parse(fields[0]), fields[1], fields[2])].Add(baseSequence); + } + else + { + modPeptidesWithPositionBaseSequences.Add((int.Parse(fields[0]), fields[1], fields[2]), new List { baseSequence }); + } + + if (basePeptideToFullSequences.ContainsKey(baseSequence)) + { + basePeptideToFullSequences[baseSequence].Add(fields[3]); + } + else + { + basePeptideToFullSequences.Add(baseSequence, new List { fields[3] }); + } + } + firstLine = false; + } + } + + foreach (var kvp in modPeptidesWithPositionBaseSequences) + { + modPeptidesWithPositionBaseSequences[kvp.Key] = kvp.Value.Distinct().ToList(); + } + + foreach (var kvp in basePeptideToFullSequences) + { + basePeptideToFullSequences[kvp.Key] = kvp.Value.Distinct().ToList(); + } + } + + [Test] + public static void Junk7() + { + Dictionary<(string, string), string> baseSequencefullSequencQvaluePepQvalueforPSMs = new Dictionary<(string, string), string>(); + + using (StreamReader sr = new StreamReader(@"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\AllPSMsMarkovich.psmtsv")) + { + bool continueReading = true; + string line; + while ((line = sr.ReadLine()) != null && continueReading) + { + string[] fields = line.Split('\t'); + string baseSequence = fields[12]; + string fullSequence = fields[13]; + string targetDecoyContam = fields[38]; + string qValue = fields[50]; + string pepQValue = fields[55]; + if (targetDecoyContam == "T" && double.TryParse(qValue, out double qValueDouble)) + { + if (qValueDouble < 0.01 && !baseSequencefullSequencQvaluePepQvalueforPSMs.ContainsKey((baseSequence, fullSequence))) + { + baseSequencefullSequencQvaluePepQvalueforPSMs.Add((baseSequence, fullSequence), qValue + "\t" + pepQValue); + } + + } + + if (double.TryParse(qValue, out double qValueDouble2)) + { + if (qValueDouble2 > 0.01) + { + continueReading = false; + } + } + } + } + + string psmFilePath = + @"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\AllPeptidesMarkovich.psmtsv"; + List parsedPeptides = SpectrumMatchTsvReader.ReadPsmTsv(psmFilePath, out var warnings); + parsedPeptides = parsedPeptides.Where(p => p.DecoyContamTarget.Contains("T")).ToList(); + + List interestingMods = new List { "S[Common Biological:Phosphorylation on S]", "T[Common Biological:Phosphorylation on T]", "S[Less Common:Dehydroalanine on S]", "C[Less Common:Dehydroalanine on C]", + "S[Custom:Homocys on S]", "C[Custom:Homocys on C]", "T[Custom:Homocys on T]", "T[Less Common:Dehydrobutyrine on T]", "S[Custom:DTT on S]", "C[Custom:DTT on C]", "C[Custom:DTT on T]", "T[Custom:Glutathione on T]", + "S[Custom:Glutathione on S]", "C[Custom:Glutathione on C]", "S[Custom:TCEP on S]", "T[Custom:TCEP on T]", "C[Custom:TCEP on C]" }; + + Dictionary<(string, int), List> genePositionPeptides = new Dictionary<(string, int), List>(); + Dictionary<(string, int), List> genePositionMod = new Dictionary<(string, int), List>(); + Dictionary> noInterestingModsPeptideValues = new Dictionary>(); + + foreach (var peptide in parsedPeptides) + { + if (baseSequencefullSequencQvaluePepQvalueforPSMs.ContainsKey((peptide.BaseSeq, peptide.FullSequence))) + { + List foundMods = interestingMods.Where(s => peptide.FullSequence.Contains(s)).ToList(); + if (foundMods.Any()) + { + foreach (var mod in foundMods) + { + string sequence = peptide.FullSequence; + string firstCharacter = mod.Substring(0, 1); + if (firstCharacter == "Y" || firstCharacter == "S" || firstCharacter == "T" || + firstCharacter == "C") + { + sequence = sequence.Replace(mod, firstCharacter.ToLowerInvariant()); + } + + sequence = sequence.Replace("[I]", ""); + sequence = sequence.Replace("[II]", ""); + sequence = sequence.Replace("[III]", ""); + + //eliminate the remaning mods + while (sequence.Contains("[") && sequence.Contains("]")) + { + int firstOpenBracket = sequence.IndexOf('['); + int firstCloseBracket = sequence.IndexOf(']', firstOpenBracket); + if (firstCloseBracket != -1) + { + sequence = sequence.Remove(firstOpenBracket, firstCloseBracket - firstOpenBracket + 1); + } + else + { + break; + } + } + + List lowercaseLetterPositions = new List(); + + for (int i = 0; i < sequence.Length; i++) + { + if (char.IsLower(sequence[i])) + { + lowercaseLetterPositions.Add(i); + } + } + + string firstAndLastAminoAcidPositionInProtein = + peptide.StartAndEndResiduesInProtein.Split('|')[0]; + firstAndLastAminoAcidPositionInProtein = + firstAndLastAminoAcidPositionInProtein.Substring(1, + firstAndLastAminoAcidPositionInProtein.Length - 2); + firstAndLastAminoAcidPositionInProtein = + firstAndLastAminoAcidPositionInProtein.Replace(" to ", "\t"); + int[] startEnd = firstAndLastAminoAcidPositionInProtein.Split('\t').Select(int.Parse) + .ToArray(); + lowercaseLetterPositions = lowercaseLetterPositions.Select(s => s + startEnd[0]).ToList(); + + string allGenesInPsm = peptide.GeneName; + string firstGene = ""; + if (allGenesInPsm.Contains("|")) + { + string[] genes = allGenesInPsm.Split('|'); + + if (genes[0].Contains(":")) + { + firstGene = genes[0].Split(':')[1]; + + foreach (int position in lowercaseLetterPositions) + { + if (genePositionPeptides.ContainsKey((firstGene, position))) + { + genePositionPeptides[(firstGene, position)].Add(peptide); + genePositionMod[(firstGene, position)] + .Add(mod + "\t" + peptide.QValue + "\t" + peptide.PEP_QValue); + + } + else + { + genePositionPeptides.Add((firstGene, position), new List { peptide }); + genePositionMod.Add((firstGene, position), + new List { mod + "\t" + peptide.QValue + "\t" + peptide.PEP_QValue }); + } + } + } + else + { + firstGene = genes[0]; + if (genes[0].Contains(":")) + { + firstGene = genes[0].Split(':')[1]; + } + foreach (int position in lowercaseLetterPositions) + { + if (genePositionPeptides.ContainsKey((firstGene, position))) + { + genePositionPeptides[(firstGene, position)].Add(peptide); + genePositionMod[(firstGene, position)] + .Add(mod + "\t" + peptide.QValue + "\t" + peptide.PEP_QValue); + + } + else + { + genePositionPeptides.Add((firstGene, position), new List { peptide }); + genePositionMod.Add((firstGene, position), + new List { mod + "\t" + peptide.QValue + "\t" + peptide.PEP_QValue }); + } + } + } + } + else + { + firstGene = peptide.GeneName; + if (peptide.GeneName.Contains(":")) + { + firstGene = peptide.GeneName.Split(':')[1]; + } + + foreach (int position in lowercaseLetterPositions) + { + if (genePositionPeptides.ContainsKey((firstGene, position))) + { + genePositionPeptides[(firstGene, position)].Add(peptide); + genePositionMod[(firstGene, position)] + .Add(mod + "\t" + peptide.QValue + "\t" + peptide.PEP_QValue); + + } + else + { + genePositionPeptides.Add((firstGene, position), new List { peptide }); + genePositionMod.Add((firstGene, position), + new List { mod + "\t" + peptide.QValue + "\t" + peptide.PEP_QValue }); + } + } + } + } + } + else + { + if (noInterestingModsPeptideValues.ContainsKey(peptide.BaseSeq)) + { + noInterestingModsPeptideValues[peptide.BaseSeq].Add(peptide); + } + else + { + noInterestingModsPeptideValues.Add(peptide.BaseSeq, new List { peptide }); + } + + } + } + } + + List<(string,string)> outList = new List<(string, string)>(); + List myOut = new List(); + myOut.Add("position" + "\t" + "protein accession" + "\t" + "gene" + "\t" + "base sequence" + "\t" + "full sequence" + "\t" + "modification" + "\t" + "Peptide Q-value" + "\t" + "Peptide PEP Q-Value" + "\t" + "PSM Q-value" + "\t" + "PSM PEP Q-Value"); + + foreach (var kvp in genePositionPeptides) + { + if (kvp.Value.Count > 1) + { + foreach (var psm in kvp.Value) + { + int index = kvp.Value.IndexOf(psm); + if (baseSequencefullSequencQvaluePepQvalueforPSMs.ContainsKey((psm.BaseSeq, psm.FullSequence))) + { + myOut.Add(kvp.Key.Item2 + "\t" + psm.ProteinAccession + "\t" + kvp.Key.Item1 + "\t" + psm.BaseSeq + "\t" + psm.FullSequence + "\t" + genePositionMod[kvp.Key][index] + "\t" + baseSequencefullSequencQvaluePepQvalueforPSMs[(psm.BaseSeq, psm.FullSequence)]); + outList.Add((psm.FullSequence, kvp.Key.Item2 + "\t" + psm.ProteinAccession + "\t" + kvp.Key.Item1 + "\t" + psm.BaseSeq + "\t" + psm.FullSequence + "\t" + genePositionMod[kvp.Key][index] + "\t" + baseSequencefullSequencQvaluePepQvalueforPSMs[(psm.BaseSeq, psm.FullSequence)])); + if (noInterestingModsPeptideValues.ContainsKey(psm.BaseSeq)) + { + foreach (var psm2 in noInterestingModsPeptideValues[psm.BaseSeq]) + { + myOut.Add(kvp.Key.Item2 + "\t" + psm2.ProteinAccession + "\t" + kvp.Key.Item1 + "\t" + psm2.BaseSeq + "\t" + psm2.FullSequence + "\t" + "No Modifications of Interest" + "\t" + psm2.QValue + "\t" + psm2.PEP_QValue + "\t" + baseSequencefullSequencQvaluePepQvalueforPSMs[(psm2.BaseSeq, psm2.FullSequence)]); + outList.Add((psm2.FullSequence, kvp.Key.Item2 + "\t" + psm2.ProteinAccession + "\t" + kvp.Key.Item1 + "\t" + psm2.BaseSeq + "\t" + psm2.FullSequence + "\t" + "No Modifications of Interest" + "\t" + psm2.QValue + "\t" + psm2.PEP_QValue + "\t" + baseSequencefullSequencQvaluePepQvalueforPSMs[(psm2.BaseSeq, psm2.FullSequence)])); + } + noInterestingModsPeptideValues.Remove(psm.BaseSeq); + } + } + else + { + //myOut.Add(kvp.Key.Item2 + "\t" + psm.ProteinAccession + "\t" + kvp.Key.Item1 + "\t" + psm.BaseSeq + "\t" + psm.FullSequence + "\t" + genePositionMod[kvp.Key][index]); + + //if (noInterestingModsPeptideValues.ContainsKey(psm.BaseSeq)) + //{ + // foreach (var psm2 in noInterestingModsPeptideValues[psm.BaseSeq]) + // { + // string psmQvalues = baseSequencefullSequencQvaluePepQvalueforPSMs[(psm2.BaseSeq, psm2.FullSequence)]; + // myOut.Add(kvp.Key.Item2 + "\t" + psm2.ProteinAccession + "\t" + kvp.Key.Item1 + "\t" + psm2.BaseSeq + "\t" + psm2.FullSequence + "\t" + "No Modifications of Interest" + "\t" + psm2.QValue + "\t" + psm2.PEP_QValue + "\t" + psmQvalues); + + // } + // noInterestingModsPeptideValues.Remove(psm.BaseSeq); + //} + } + + } + } + } + + Dictionary quantDict = new Dictionary(); + using (StreamReader sr = + new StreamReader( + @"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\QuantifiedPeptidesNormalizedMarkovich.tsv")) + { + string line; + while ((line = sr.ReadLine()) != null) + { + string[] fields = line.Split('\t'); + string fullSequence = fields[0]; + if (!quantDict.ContainsKey(fullSequence)) + { + quantDict.Add(fullSequence, line); + } + } + } + + List newOutList = new List(); + newOutList.Add(myOut[0] + "\t" + quantDict["Sequence"]); + foreach (var kvp in outList) + { + if (quantDict.ContainsKey(kvp.Item1)) + { + newOutList.Add(kvp.Item2 + "\t" + quantDict[kvp.Item1]); + } + else + { + newOutList.Add(kvp.Item2); + } + } + + File.WriteAllLines(@"C:\Users\Michael Shortreed\Downloads\4-26-24 AD Brain DHAA Analysis\modPeptidesMarkovichWithPositionAndQuant_new.txt", newOutList); + } + + [Test] public void SpectrumCount() {