diff --git a/Ilmn.Das.App.Wittyer.Test/Ilmn.Das.App.Wittyer.Test.csproj b/Ilmn.Das.App.Wittyer.Test/Ilmn.Das.App.Wittyer.Test.csproj index 8133b6c..9d1f418 100644 --- a/Ilmn.Das.App.Wittyer.Test/Ilmn.Das.App.Wittyer.Test.csproj +++ b/Ilmn.Das.App.Wittyer.Test/Ilmn.Das.App.Wittyer.Test.csproj @@ -50,6 +50,30 @@ Always + + Always + + + Always + + + Always + + + Always + + + Always + + + Always + + + Always + + + Always + diff --git a/Ilmn.Das.App.Wittyer.Test/Resources/Tiny/bed.bed b/Ilmn.Das.App.Wittyer.Test/Resources/Tiny/bed.bed new file mode 100644 index 0000000..69d6ccf --- /dev/null +++ b/Ilmn.Das.App.Wittyer.Test/Resources/Tiny/bed.bed @@ -0,0 +1,3 @@ +chr1 40000000 80500000 +chr2 40000000 80500000 +chr2 85000000 95510000 diff --git a/Ilmn.Das.App.Wittyer.Test/Resources/Tiny/query.vcf b/Ilmn.Das.App.Wittyer.Test/Resources/Tiny/query.vcf index ff9990d..310fadb 100644 --- a/Ilmn.Das.App.Wittyer.Test/Resources/Tiny/query.vcf +++ b/Ilmn.Das.App.Wittyer.Test/Resources/Tiny/query.vcf @@ -4,4 +4,5 @@ chr1 50000001 . N 10 PASS END=50064000;SVTYPE=DUP GT:CN 0/1:1 chr1 60000001 . N 10 PASS END=60128000;SVTYPE=DUP GT:CN 1/1:4 chr1 70000001 . N 10 PASS END=70250000;SVTYPE=DUP GT:CN 0/0:2 -chr1 80000001 . N 10 PASS END=80510000;SVTYPE=DUP GT:CN 0/1:3 +chr2 80000001 . N 10 PASS END=80510000;SVTYPE=DUP GT:CN 0/1:3 +chr2 90000001 . N 10 PASS END=90510000;SVTYPE=DUP GT:CN 0/1:3 diff --git a/Ilmn.Das.App.Wittyer.Test/Resources/Tiny/truth.vcf b/Ilmn.Das.App.Wittyer.Test/Resources/Tiny/truth.vcf index d7bcdb3..1c59a94 100644 --- a/Ilmn.Das.App.Wittyer.Test/Resources/Tiny/truth.vcf +++ b/Ilmn.Das.App.Wittyer.Test/Resources/Tiny/truth.vcf @@ -4,4 +4,5 @@ chr1 50000001 . N 10 PASS END=50064000;SVTYPE=DUP GT:CN 0/1:1 chr1 60000001 . N 10 PASS END=60128000;SVTYPE=DUP GT:CN 1/1:3 chr1 70000001 . N 10 PASS END=70250000;SVTYPE=DUP GT:CN 0/0:2 -chr1 80000001 . N 10 PASS END=80510000;SVTYPE=DUP GT:CN 0/1:3 +chr2 80000001 . N 10 PASS END=80510000;SVTYPE=DUP GT:CN 0/1:3 +chr2 90000001 . N 10 PASS END=90510000;SVTYPE=DUP GT:CN 0/1:3 diff --git a/Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/bed.bed b/Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/bed.bed new file mode 100644 index 0000000..49ea0ff --- /dev/null +++ b/Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/bed.bed @@ -0,0 +1 @@ +chr1 145600000 146000000 diff --git a/Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/config.json b/Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/config.json new file mode 100755 index 0000000..95b6510 --- /dev/null +++ b/Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/config.json @@ -0,0 +1,29 @@ +[ + { + "variantType": "Duplication", + "binSizes": "1000,5000,10000,20000,50000", + "bpDistance": 10000, + "percentDistance": 0.25, + "includedFilters": "PASS", + "excludedFilters": "", + "includeBed": "" + }, + { + "variantType": "Deletion", + "binSizes": "1000,5000,10000,20000,50000", + "bpDistance": 10000, + "percentDistance": 0.25, + "includedFilters": "PASS", + "excludedFilters": "", + "includeBed": "" + }, + { + "variantType": "CopyNumberReference", + "binSizes": "1000,5000,10000,20000,50000", + "bpDistance": 10000, + "percentDistance": 0.25, + "includedFilters": "PASS", + "excludedFilters": "", + "includeBed": "" + } +] diff --git a/Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/query.vcf b/Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/query.vcf new file mode 100644 index 0000000..8541ed0 --- /dev/null +++ b/Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/query.vcf @@ -0,0 +1,4 @@ +##fileformat=VCFv4.2 +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT QUERY +chr1 145518989 . N 150 PASS END=145809108;REFLEN=290119;SVLEN=-290119;SVTYPE=CNV GT:SM:CN:BC:PE 0/1:0.507438:1:110:28,0 diff --git a/Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/truth.vcf b/Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/truth.vcf new file mode 100644 index 0000000..9c559a2 --- /dev/null +++ b/Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/truth.vcf @@ -0,0 +1,4 @@ +##fileformat=VCFv4.2 +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT TRUTH +chr1 145602105 . N 150 PASS END=145808783;REFLEN=206678;SVTYPE=DEL; GT:CN 0/1:1 diff --git a/Ilmn.Das.App.Wittyer.Test/SkipBinsTest.cs b/Ilmn.Das.App.Wittyer.Test/SkipBinsTest.cs index 1bf8222..1c76bb1 100644 --- a/Ilmn.Das.App.Wittyer.Test/SkipBinsTest.cs +++ b/Ilmn.Das.App.Wittyer.Test/SkipBinsTest.cs @@ -7,6 +7,7 @@ using Ilmn.Das.App.Wittyer.Vcf.Variants; using Ilmn.Das.Core.Tries.Extensions; using Ilmn.Das.Std.AppUtils.Misc; +using Ilmn.Das.Std.XunitUtils; using Xunit; namespace Ilmn.Das.App.Wittyer.Test @@ -40,12 +41,13 @@ public void SkippedBinsAreIgnoredInStats() var (_, query, truth) = MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses().First(); var results = MainLauncher.GenerateSampleMetrics(truth, query, false, inputSpecs); - Assert.Equal(1U, results.OverallStats[StatsType.Event].QueryStats.TrueCount); - Assert.Equal(1U, results.OverallStats[StatsType.Event].QueryStats.FalseCount); - Assert.Equal(0.5, results.EventLevelRecallOverall.First(typeRecallTuple => typeRecallTuple.type == WittyerType.CopyNumberGain).recall); + MultiAssert.Equal(2U, results.OverallStats[StatsType.Event].QueryStats.TrueCount); + MultiAssert.Equal(1U, results.OverallStats[StatsType.Event].QueryStats.FalseCount); + MultiAssert.Equal(0.6666666666666666, results.EventLevelRecallOverall.First(typeRecallTuple => typeRecallTuple.type == WittyerType.CopyNumberGain).recall); var numberOfBinsReportedOn = results.EventLevelRecallPerBin.First().perBinRecall.Count(); - Assert.Equal(2, numberOfBinsReportedOn); + MultiAssert.Equal(2, numberOfBinsReportedOn); + MultiAssert.AssertAll(); } } } \ No newline at end of file diff --git a/Ilmn.Das.App.Wittyer.Test/TinyTest.cs b/Ilmn.Das.App.Wittyer.Test/TinyTest.cs index 130ebcc..6e3bfab 100644 --- a/Ilmn.Das.App.Wittyer.Test/TinyTest.cs +++ b/Ilmn.Das.App.Wittyer.Test/TinyTest.cs @@ -1,11 +1,14 @@ -using System.Collections.Immutable; +using System.Collections.Generic; +using System.Collections.Immutable; using System.IO; using System.Linq; using Ilmn.Das.App.Wittyer.Infrastructure; using Ilmn.Das.App.Wittyer.Input; using Ilmn.Das.App.Wittyer.Stats; +using Ilmn.Das.App.Wittyer.Vcf.Variants; using Ilmn.Das.Core.Tries.Extensions; using Ilmn.Das.Std.AppUtils.Misc; +using Ilmn.Das.Std.XunitUtils; using Xunit; namespace Ilmn.Das.App.Wittyer.Test @@ -17,6 +20,9 @@ public class TinyTest private static readonly FileInfo TinyQuery = Path.Combine("Resources", "Tiny", "query.vcf").ToFileInfo(); + + private static readonly FileInfo TinyBed = + Path.Combine("Resources", "Tiny", "bed.bed").ToFileInfo(); [Fact] public void CrossType_Works() @@ -31,7 +37,32 @@ public void CrossType_Works() var (_, query, truth) = MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses().First(); var results = MainLauncher.GenerateSampleMetrics(truth, query, false, inputSpecs); - Assert.Equal(4U, results.OverallStats[StatsType.Event].QueryStats.TrueCount); + MultiAssert.Equal(5U, results.OverallStats[StatsType.Event].QueryStats.TrueCount); + MultiAssert.Equal(1461995U, results.OverallStats[StatsType.Base].QueryStats.TrueCount); + MultiAssert.Equal(1461995U, results.OverallStats[StatsType.Base].TruthStats.TrueCount); + MultiAssert.AssertAll(); + } + + [Fact] + public void CrossType_Bases_Works() + { + var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo(); + var inputSpecs = InputSpec.GenerateDefaultInputSpecs(false) + .Select(i => InputSpec.Create(i.VariantType, i.BinSizes, + 10000, i.PercentDistance, i.ExcludedFilters, i.IncludedFilters, + IncludeBedFile.CreateFromBedFile(TinyBed))) + .ToDictionary(i => i.VariantType, i => i); + var wittyerSettings = WittyerSettings.Create(outputDirectory, TinyTruth, TinyQuery, + ImmutableList.Empty, EvaluationMode.CrossTypeAndSimpleCounting, + inputSpecs); + + var (_, query, truth) = MainLauncher.GenerateResults(wittyerSettings) + .EnumerateSuccesses().First(); + var results = MainLauncher.GenerateSampleMetrics(truth, query, false, inputSpecs); + MultiAssert.Equal(4U, results.OverallStats[StatsType.Event].QueryStats.TrueCount); + MultiAssert.Equal(1451995U, results.OverallStats[StatsType.Base].QueryStats.TrueCount); + MultiAssert.Equal(1451995U, results.OverallStats[StatsType.Base].TruthStats.TrueCount); + MultiAssert.AssertAll(); } } } \ No newline at end of file diff --git a/Ilmn.Das.App.Wittyer.Test/Wit149.cs b/Ilmn.Das.App.Wittyer.Test/Wit149.cs new file mode 100644 index 0000000..3019e1b --- /dev/null +++ b/Ilmn.Das.App.Wittyer.Test/Wit149.cs @@ -0,0 +1,46 @@ +using System.Collections.Immutable; +using System.IO; +using System.Linq; +using Ilmn.Das.App.Wittyer.Infrastructure; +using Ilmn.Das.App.Wittyer.Input; +using Ilmn.Das.App.Wittyer.Stats; +using Ilmn.Das.Core.Tries.Extensions; +using Ilmn.Das.Std.AppUtils.Misc; +using Ilmn.Das.Std.XunitUtils; +using Xunit; + +namespace Ilmn.Das.App.Wittyer.Test +{ + public class Wit149 + { + private static readonly FileInfo Truth = + Path.Combine("Resources", "WIT-149", "truth.vcf").ToFileInfo(); + + private static readonly FileInfo Query = + Path.Combine("Resources", "WIT-149", "query.vcf").ToFileInfo(); + + private static readonly FileInfo Bed = + Path.Combine("Resources", "WIT-149", "bed.bed").ToFileInfo(); + + [Fact] + public void WholeChrIncluded() + { + var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo(); + var inputSpecs = InputSpec.GenerateDefaultInputSpecs(false).Select(i => InputSpec.Create(i.VariantType, i.BinSizes, + 10000, i.PercentDistance, i.ExcludedFilters, i.IncludedFilters, IncludeBedFile.CreateFromBedFile(Bed))) + .ToDictionary(i => i.VariantType, i => i); + var wittyerSettings = WittyerSettings.Create(outputDirectory, Truth, Query, + ImmutableList.Empty, EvaluationMode.CrossTypeAndSimpleCounting, + inputSpecs); + + var (_, query, truth) = MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses().First(); + var results = MainLauncher.GenerateSampleMetrics(truth, query, false, inputSpecs); + // should be end of bed - (start of query + 1 for padded base) = 135086622 - 1 = 135086621 + MultiAssert.Equal(135086621U, results.OverallStats[StatsType.Base].QueryStats.TrueCount); + MultiAssert.Equal(135086621U, results.OverallStats[StatsType.Base].TruthStats.TrueCount); + MultiAssert.Equal(1U, results.OverallStats[StatsType.Event].QueryStats.TrueCount); + MultiAssert.Equal(1U, results.OverallStats[StatsType.Event].TruthStats.TrueCount); + MultiAssert.AssertAll(); + } + } +} \ No newline at end of file diff --git a/Ilmn.Das.App.Wittyer.Test/Wit154.cs b/Ilmn.Das.App.Wittyer.Test/Wit154.cs new file mode 100644 index 0000000..14c0c3a --- /dev/null +++ b/Ilmn.Das.App.Wittyer.Test/Wit154.cs @@ -0,0 +1,52 @@ +using System.Collections.Generic; +using System.Collections.Immutable; +using System.IO; +using System.Linq; +using Ilmn.Das.App.Wittyer.Infrastructure; +using Ilmn.Das.App.Wittyer.Input; +using Ilmn.Das.App.Wittyer.Stats; +using Ilmn.Das.App.Wittyer.Vcf.Variants; +using Ilmn.Das.Core.Tries.Extensions; +using Ilmn.Das.Std.AppUtils.Misc; +using Ilmn.Das.Std.XunitUtils; +using Xunit; + +namespace Ilmn.Das.App.Wittyer.Test +{ + public class Wit154 + { + private static readonly FileInfo Truth = + Path.Combine("Resources", "WIT-154", "truth.vcf").ToFileInfo(); + + private static readonly FileInfo Query = + Path.Combine("Resources", "WIT-154", "query.vcf").ToFileInfo(); + + private static readonly FileInfo Bed = + Path.Combine("Resources", "WIT-154", "bed.bed").ToFileInfo(); + + private static readonly FileInfo Config = + Path.Combine("Resources", "WIT-154", "config.json").ToFileInfo(); + + [Fact] + public void CrossType_ComplexBed_Works() + { + var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo(); + var inputSpecs = InputSpec.CreateSpecsFromString( + File.ReadAllText(Config.FullName), IncludeBedFile.CreateFromBedFile(Bed)) + ?.ToDictionary(i => i.VariantType, i => i) + ?? new Dictionary(); + var wittyerSettings = WittyerSettings.Create(outputDirectory, Truth, Query, + ImmutableList.Empty, EvaluationMode.CrossTypeAndSimpleCounting, + inputSpecs); + + var (_, query, truth) = MainLauncher.GenerateResults(wittyerSettings) + .EnumerateSuccesses().First(); + var results = MainLauncher + .GenerateSampleMetrics(truth, query, false, inputSpecs); + var baseStats = results.DetailedStats[WittyerType.Deletion].OverallStats[StatsType.Base]; + MultiAssert.Equal(206678U, baseStats.QueryStats.TrueCount); + MultiAssert.Equal(206678U, baseStats.TruthStats.TrueCount); + MultiAssert.AssertAll(); + } + } +} \ No newline at end of file diff --git a/Ilmn.Das.App.Wittyer/Infrastructure/Quantify.cs b/Ilmn.Das.App.Wittyer/Infrastructure/Quantify.cs index 0f8df8a..5f5fc4d 100644 --- a/Ilmn.Das.App.Wittyer/Infrastructure/Quantify.cs +++ b/Ilmn.Das.App.Wittyer/Infrastructure/Quantify.cs @@ -145,8 +145,7 @@ private static (IBasicStatsCount overallBaseStats, var typeTotalTpTrees = perTypeTotalTpDictionary.GetOrAdd(type, _ => new ConcurrentDictionary>()); - foreach (var binGroup in variants.Where(it => // must be Assessed - it.Sample.Wit != WitDecision.NotAssessed) + foreach (var binGroup in variants .GroupBy(v => v.Win.Start)) { var binStart = binGroup.Key; @@ -168,10 +167,15 @@ private static (IBasicStatsCount overallBaseStats, eventStats.AddTrueEvent(); else if (variant.Sample.Wit == falseDecision) eventStats.AddFalseEvent(); - else if (variant.Sample.Wit != WitDecision.NotAssessed) + else if (variant.Sample.Wit == WitDecision.NotAssessed) + { + // purposely empty, don't do anything on this type but we need to keep this type here + // for base stats. + } + else throw new InvalidDataException( - $"Unexpected {nameof(WitDecision)} value ({variant.Sample.Wit}) for variant: " + - variant.OriginalVariant.ToShortString()); + $"Unexpected {nameof(WitDecision)} value ({variant.Sample.Wit}) for variant: " + + variant.OriginalVariant.ToShortString()); if (!type.HasBaseLevelStats) continue; @@ -243,23 +247,6 @@ private static (IBasicStatsCount overallBaseStats, stats.AddFalseCount(chr, interval); } - - ////This is here as sanity check code, we can remove later if we want. - - //if (stats == null) // means TotalTree has no intervals - // continue; - - //// eventually get rid of this by replacing with actually keeping track of totals in the stats - //// and after outputting stats, we should do sanity check and crash if not equal. - //var fpTotal = stats.FalseCount.Select(kvp => kvp.Value.GetTotalMergedLength()).Sum(); - //var tpTotal = stats.TrueCount.Select(kvp => kvp.Value.GetTotalMergedLength()).Sum(); - //var expectedTotal = perBinTotalDictionary[binGroup.Key] - // .Select(kvp => kvp.Value.GetTotalMergedLength()).Sum(); - //var actualTotal = fpTotal + tpTotal; - - //if (actualTotal != expectedTotal) - // throw new InvalidDataException( - // $"Expected total bases to be {expectedTotal}, but got {actualTotal}!"); } } diff --git a/Ilmn.Das.App.Wittyer/Input/IncludeBedFile.cs b/Ilmn.Das.App.Wittyer/Input/IncludeBedFile.cs index ee01621..38e2f33 100644 --- a/Ilmn.Das.App.Wittyer/Input/IncludeBedFile.cs +++ b/Ilmn.Das.App.Wittyer/Input/IncludeBedFile.cs @@ -24,7 +24,9 @@ public class IncludeBedFile /// /// The IntervalTree from this bed file. /// - [NotNull] public readonly GenomeIntervalTree IntervalTree; + [NotNull] + public GenomeIntervalTree IntervalTree => _intervalTree.Value; + [NotNull] private readonly Lazy> _intervalTree; /// /// The bed file associated with this instance. If created from , this will write out a bed file. @@ -32,10 +34,10 @@ public class IncludeBedFile [NotNull] public FileInfo BedFile => _fileSource.Value; private readonly Lazy _fileSource; - private IncludeBedFile([NotNull] GenomeIntervalTree tree, + private IncludeBedFile([NotNull] Lazy> tree, [NotNull] Lazy fileSource) { - IntervalTree = tree; + _intervalTree = tree; _fileSource = fileSource; } @@ -56,7 +58,8 @@ public static IncludeBedFile CreateFromContigIntervals( { var tree = contigIntervals as GenomeIntervalTree ?? CreateGenomeIntervalTree(contigIntervals); - return new IncludeBedFile(tree, CreateBedFileLazy(tree)); + return new IncludeBedFile(new Lazy>(tree), + CreateBedFileLazy(tree)); Lazy CreateBedFileLazy( IEnumerable thisTree) @@ -93,19 +96,29 @@ private static GenomeIntervalTree CreateGenomeIntervalTree( var listOrder = new List(); foreach (var contigInterval in contigIntervals) { - if (!dictionary.TryGetValue(contigInterval.Contig, out var tree)) + var contig = contigInterval.Contig; + if (!dictionary.TryGetValue(contig, out var tree)) { tree = MergedIntervalTree.Create(null); - listOrder.Add(contigInterval.Contig); - dictionary.Add(contigInterval.Contig, tree); + listOrder.Add(contig); + dictionary.Add(contig, tree); } tree.Add(contigInterval); } var ret = GenomeIntervalTree.Create(); foreach (var contig in listOrder) + { ret.AddRange(dictionary[contig] - .Select(i => i as IContigAndInterval ?? ContigAndInterval.Create(contig, i.Start, i.Stop))); + .Select(i => i as IContigAndInterval + ?? ContigAndInterval.Create(contig, i.Start, i.Stop))); + var other = contig.ToUcscStyle(); + if (other.Name == contig.Name) + other = contig.ToGrchStyle(); + if (other.Name != contig.Name) + ret.AddRange(dictionary[contig] + .Select(i => ContigAndInterval.Create(other, i.Start, i.Stop))); + } return ret; } @@ -118,7 +131,10 @@ private static GenomeIntervalTree CreateGenomeIntervalTree( [NotNull] [Pure] public static IncludeBedFile CreateFromBedFile([NotNull] FileInfo bedFile) - => CreateFromBedReader(BedReader.Create(bedFile)); + => bedFile.ExistsNow() + ? CreateFromBedReader(BedReader.Create(bedFile)) + : TypeCache.GetOrAdd(bedFile.FullName, + () => CreateFromBedReader(BedReader.Create(bedFile))); /// /// Creates a new instance of from a . @@ -128,7 +144,8 @@ public static IncludeBedFile CreateFromBedFile([NotNull] FileInfo bedFile) [Pure] public static IncludeBedFile CreateFromBedReader([NotNull] BedReader bedReader) => TypeCache.GetOrAdd(bedReader.FileSource.GetCompleteRealPath().FullName, () => - new IncludeBedFile(CreateGenomeIntervalTree(bedReader), + new IncludeBedFile(new Lazy>( + () => CreateGenomeIntervalTree(bedReader)), new Lazy(() => bedReader.FileSource))); /// diff --git a/Ilmn.Das.App.Wittyer/Input/InputParseUtils.cs b/Ilmn.Das.App.Wittyer/Input/InputParseUtils.cs index 58d7f7c..e0dda91 100644 --- a/Ilmn.Das.App.Wittyer/Input/InputParseUtils.cs +++ b/Ilmn.Das.App.Wittyer/Input/InputParseUtils.cs @@ -98,8 +98,6 @@ internal static IncludeBedFile ParseBedFile([CanBeNull] string filePath) if (string.IsNullOrWhiteSpace(filePath)) return null; var file = filePath.ToFileInfo(); - if (!file.ExistsNow()) - throw new FileNotFoundException($"{filePath} not found!"); return IncludeBedFile.CreateFromBedFile(file); } } diff --git a/Ilmn.Das.App.Wittyer/Input/InputSpec.cs b/Ilmn.Das.App.Wittyer/Input/InputSpec.cs index 79810f3..93f68fb 100644 --- a/Ilmn.Das.App.Wittyer/Input/InputSpec.cs +++ b/Ilmn.Das.App.Wittyer/Input/InputSpec.cs @@ -182,6 +182,27 @@ public static IEnumerable GenerateCustomInputSpecs(bool isCrossTypeOf includedFilters ?? WittyerConstants.DefaultIncludeFilters, bedFile)); + /// + /// Creates a new instance of with a new value for the + /// + /// The new , can be null. + [NotNull] + [Pure] + public InputSpec ReplaceBedFile([CanBeNull] IncludeBedFile bedFile) + => Create(VariantType, BinSizes, BasepairDistance, PercentDistance, ExcludedFilters, + IncludedFilters, bedFile); + + /// + /// Creates an IEnumerable of s with a possible override of the + /// + [CanBeNull] + [Pure] + public static IEnumerable CreateSpecsFromString( + string configText, [CanBeNull] IncludeBedFile bedFileOverride) + => JsonConvert + .DeserializeObject>(configText, InputSpecConverter.Create()) + ?.Select(x => bedFileOverride == null ? x : x.ReplaceBedFile(bedFileOverride)); + [NotNull] private static IReadOnlyCollection VerifyFiltersAndGetFinalIncluded( [NotNull] IReadOnlyCollection excludedFilters, [CanBeNull] IReadOnlyCollection includedFilters) diff --git a/Ilmn.Das.App.Wittyer/Input/WittyerSettings.cs b/Ilmn.Das.App.Wittyer/Input/WittyerSettings.cs index fd02a8f..1e078ed 100644 --- a/Ilmn.Das.App.Wittyer/Input/WittyerSettings.cs +++ b/Ilmn.Das.App.Wittyer/Input/WittyerSettings.cs @@ -165,11 +165,12 @@ internal static IWittyerSettings ParsePrivate(string[] args) if (parameters._configFile.IsArgumentAssigned) { - if (parameters._configOptions.Any(x => x.IsArgumentAssigned)) + if (parameters._configOptions.Any(x => x.IsArgumentAssigned + && x != parameters._bedFile)) { Console.Error.WriteLine( "Config file argument cannot be used in combination with arguments for bin sizes, basepair distance, " + - "percent distance, included filters, excluded filters, variant types, or include bed. Exiting."); + "percent distance, included filters, excluded filters, or variant types. Exiting."); Environment.Exit(1); } @@ -190,8 +191,8 @@ internal static IWittyerSettings ParsePrivate(string[] args) Environment.Exit(1); } - parameters.InputSpecs = JsonConvert - .DeserializeObject>(configText, InputSpecConverter.Create()) + var bedFile = parameters._bedFile.IsArgumentAssigned ? parameters._bedFile.Argument : null; + parameters.InputSpecs = InputSpec.CreateSpecsFromString(configText, bedFile) .ToImmutableDictionary(x => x.VariantType, x => x); } else diff --git a/Ilmn.Das.App.Wittyer/Json/JsonConverters/InputSpecConverter.cs b/Ilmn.Das.App.Wittyer/Json/JsonConverters/InputSpecConverter.cs index 1595f15..cc9dcf9 100644 --- a/Ilmn.Das.App.Wittyer/Json/JsonConverters/InputSpecConverter.cs +++ b/Ilmn.Das.App.Wittyer/Json/JsonConverters/InputSpecConverter.cs @@ -84,7 +84,7 @@ void VerifySettings() throw new JsonSerializationException($"Duplicate variant type '{variantType}' in the config file."); var expectedFieldNames = variantTypeEnum.HasBins - ? (variantTypeEnum.HasLengths ? AllFieldNames : InsFieldNames) + ? variantTypeEnum.HasLengths ? AllFieldNames : InsFieldNames : TraFieldNames; var missingFields = expectedFieldNames.Except(fieldNames); diff --git a/Ilmn.Das.App.Wittyer/Utilities/WittyerUtils.cs b/Ilmn.Das.App.Wittyer/Utilities/WittyerUtils.cs index c826182..74b5d33 100644 --- a/Ilmn.Das.App.Wittyer/Utilities/WittyerUtils.cs +++ b/Ilmn.Das.App.Wittyer/Utilities/WittyerUtils.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Collections.Generic; using System.Collections.Immutable; using System.IO; @@ -311,4 +311,4 @@ public static double GetFscore([NotNull] this IStatsUnit stats) public static uint GetTotal([NotNull] this IBasicStatsCount stats) => stats.TrueCount + stats.FalseCount; } -} +} \ No newline at end of file diff --git a/Ilmn.Das.App.Wittyer/Vcf/Variants/WittyerVariantInternal.cs b/Ilmn.Das.App.Wittyer/Vcf/Variants/WittyerVariantInternal.cs index 455b4c7..377a47f 100644 --- a/Ilmn.Das.App.Wittyer/Vcf/Variants/WittyerVariantInternal.cs +++ b/Ilmn.Das.App.Wittyer/Vcf/Variants/WittyerVariantInternal.cs @@ -117,7 +117,7 @@ public void Finalize(WitDecision falseDecision, EvaluationMode mode, if (includedRegions.TryGetValue(Contig, out var tree)) { var startPosition = CiPosInterval.Stop - 1; - var endPosition = CiEndInterval.Start + 1; + var endPosition = CiEndInterval.Start; if (startPosition >= endPosition) // means cipos or ciend goes past each other // so any overlap with Start to Stop should mean included. diff --git a/docs/examples/CNV/README.md b/docs/examples/CNV/README.md index 5e72b27..e303f89 100644 --- a/docs/examples/CNV/README.md +++ b/docs/examples/CNV/README.md @@ -2,9 +2,11 @@ In this folder, you will find a dragen CNV output file ([HG002.cnv.vcf.gz](HG002.cnv.vcf.gz)) and a [config.json](config.json), which will be used as input. Example output can be found in the [output folder](output). -1. Download the NIST truth set [VCF file](ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/analysis/NIST_SVs_Integration_v0.6/HG002_SVs_Tier1_v0.6.vcf.gz) and [BED file](ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/analysis/NIST_SVs_Integration_v0.6/HG002_SVs_Tier1_v0.6.bed) -2. Install dotnet if you don't already have it installed on your system (you can google directions on how to install based on your system). -3. Build witty.er with the `dotnet publish` command under the Wittyer project folder +1. Download the NIST truth set: + 1. VCF file: ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/analysis/NIST_SVs_Integration_v0.6/HG002_SVs_Tier1_v0.6.vcf.gz + 2. BED file: ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/analysis/NIST_SVs_Integration_v0.6/HG002_SVs_Tier1_v0.6.bed +2. Install `dotnet` if you don't already have it installed on your system (you can google directions on how to install based on your system). +3. Build `witty.er` with the `dotnet publish` command under the `Wittyer` project folder 4. Run the following command line: ```bash diff --git a/docs/examples/README.md b/docs/examples/README.md index ac1171b..14fc864 100644 --- a/docs/examples/README.md +++ b/docs/examples/README.md @@ -4,5 +4,5 @@ These folders contain example output files from dragen evaluated against the NIS You can follow the instructions in the following `README`s to reproduce the examples. -* [CNV Example information](CNV/README.md) -* [SV Example information](SV/README.md) +* [CNV Example information](CNV) +* [SV Example information](SV) diff --git a/docs/examples/SV/README.md b/docs/examples/SV/README.md index 17e5c47..32655c8 100644 --- a/docs/examples/SV/README.md +++ b/docs/examples/SV/README.md @@ -2,9 +2,11 @@ In this folder, you will find a dragen SV output file ([HG002.sv.vcf.gz](HG002.sv.vcf.gz)) and a [config.json](config.json), which will be used as input. Example output can be found in the [output folder](output). -1. Download the NIST truth set [VCF file](ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/analysis/NIST_SVs_Integration_v0.6/HG002_SVs_Tier1_v0.6.vcf.gz) and [BED file](ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/analysis/NIST_SVs_Integration_v0.6/HG002_SVs_Tier1_v0.6.bed) -2. Install dotnet if you don't already have it installed on your system (you can google directions on how to install based on your system). -3. Build witty.er with the `dotnet publish` command under the Wittyer project folder +1. Download the NIST truth set: + 1. VCF file: ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/analysis/NIST_SVs_Integration_v0.6/HG002_SVs_Tier1_v0.6.vcf.gz + 2. BED file: ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/analysis/NIST_SVs_Integration_v0.6/HG002_SVs_Tier1_v0.6.bed +2. Install `dotnet` if you don't already have it installed on your system (you can google directions on how to install based on your system). +3. Build `witty.er` with the `dotnet publish` command under the `Wittyer` project folder 4. Run the following command line: ```bash diff --git a/docs/release-notes/README.md b/docs/release-notes/README.md index f26d7b7..014b03f 100644 --- a/docs/release-notes/README.md +++ b/docs/release-notes/README.md @@ -11,4 +11,4 @@ v0.3.0 | 02/15/2019 | [v0.3.0](v0.3.0.md) v0.3.1 | 04/02/2019 | [v0.3.1](v0.3.1.md) v0.3.2 | 06/04/2019 | [v0.3.2](v0.3.2.md) v0.3.3 | 01/31/2020 | [v0.3.3](v0.3.3.md) -v0.3.4 | TBA | [v0.3.4](v0.3.4.md) +v0.3.4 | 11/02/2020 | [v0.3.4](v0.3.4.md) diff --git a/docs/release-notes/v0.3.4.md b/docs/release-notes/v0.3.4.md index e69de29..850b2e7 100644 --- a/docs/release-notes/v0.3.4.md +++ b/docs/release-notes/v0.3.4.md @@ -0,0 +1,13 @@ +# Witty.er v0.3.4 release notes +[Release Notes Index](README.md) + +## Feature updates +- [WIT-147](https://jira.illumina.com/browse/WIT-147) Allow --include-bed to be used in conjunction with config-files since it's a path. +- [WIT-148](https://jira.illumina.com/browse/WIT-148) Include Bed file should do magic for UCSC and Ensembl names +- [WIT-151](https://jira.illumina.com/browse/WIT-151) Support Sniffles breakend point notation better +- Added examples for CNV and SV evaluation using dragen output files and the NIST HG002 truth data. + +## Bug fixes +- [WIT-149](https://jira.illumina.com/browse/WIT-149) include bed doesn't correctly handle bed end intervals +- [WIT-152](https://jira.illumina.com/browse/WIT-152) include bed file causes base stats to be incorrect base stats +- [WIT-154](https://jira.illumina.com/browse/WIT-154) Base level stats don't match between truth and query when IncludeBed includes one but not the other