Skip to content

Commit

Permalink
Merge pull request #10 from Illumina/feature/WIT-154
Browse files Browse the repository at this point in the history
Fix issue with base stats when bed files include truth but not query …
  • Loading branch information
Kentalot committed Nov 2, 2020
2 parents 86a1b63 + a74cd41 commit 63acf60
Show file tree
Hide file tree
Showing 25 changed files with 298 additions and 59 deletions.
24 changes: 24 additions & 0 deletions Ilmn.Das.App.Wittyer.Test/Ilmn.Das.App.Wittyer.Test.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,30 @@
<None Update="Resources\WIT-144\truth.vcf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Resources\WIT-149\bed.bed">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Resources\WIT-149\query.vcf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Resources\WIT-149\truth.vcf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Resources\Tiny\bed.bed">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Resources\WIT-154\config.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Resources\WIT-154\bed.bed">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Resources\WIT-154\query.vcf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Resources\WIT-154\truth.vcf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

<ItemGroup>
Expand Down
3 changes: 3 additions & 0 deletions Ilmn.Das.App.Wittyer.Test/Resources/Tiny/bed.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
chr1 40000000 80500000
chr2 40000000 80500000
chr2 85000000 95510000
3 changes: 2 additions & 1 deletion Ilmn.Das.App.Wittyer.Test/Resources/Tiny/query.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
chr1 50000001 . N <DEL> 10 PASS END=50064000;SVTYPE=DUP GT:CN 0/1:1
chr1 60000001 . N <DUP> 10 PASS END=60128000;SVTYPE=DUP GT:CN 1/1:4
chr1 70000001 . N <DUP> 10 PASS END=70250000;SVTYPE=DUP GT:CN 0/0:2
chr1 80000001 . N <DUP> 10 PASS END=80510000;SVTYPE=DUP GT:CN 0/1:3
chr2 80000001 . N <DUP> 10 PASS END=80510000;SVTYPE=DUP GT:CN 0/1:3
chr2 90000001 . N <DUP> 10 PASS END=90510000;SVTYPE=DUP GT:CN 0/1:3
3 changes: 2 additions & 1 deletion Ilmn.Das.App.Wittyer.Test/Resources/Tiny/truth.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
chr1 50000001 . N <DEL> 10 PASS END=50064000;SVTYPE=DUP GT:CN 0/1:1
chr1 60000001 . N <DUP> 10 PASS END=60128000;SVTYPE=DUP GT:CN 1/1:3
chr1 70000001 . N <DUP> 10 PASS END=70250000;SVTYPE=DUP GT:CN 0/0:2
chr1 80000001 . N <DUP> 10 PASS END=80510000;SVTYPE=DUP GT:CN 0/1:3
chr2 80000001 . N <DUP> 10 PASS END=80510000;SVTYPE=DUP GT:CN 0/1:3
chr2 90000001 . N <DUP> 10 PASS END=90510000;SVTYPE=DUP GT:CN 0/1:3
1 change: 1 addition & 0 deletions Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/bed.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
chr1 145600000 146000000
29 changes: 29 additions & 0 deletions Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[
{
"variantType": "Duplication",
"binSizes": "1000,5000,10000,20000,50000",
"bpDistance": 10000,
"percentDistance": 0.25,
"includedFilters": "PASS",
"excludedFilters": "",
"includeBed": ""
},
{
"variantType": "Deletion",
"binSizes": "1000,5000,10000,20000,50000",
"bpDistance": 10000,
"percentDistance": 0.25,
"includedFilters": "PASS",
"excludedFilters": "",
"includeBed": ""
},
{
"variantType": "CopyNumberReference",
"binSizes": "1000,5000,10000,20000,50000",
"bpDistance": 10000,
"percentDistance": 0.25,
"includedFilters": "PASS",
"excludedFilters": "",
"includeBed": ""
}
]
4 changes: 4 additions & 0 deletions Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/query.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
##fileformat=VCFv4.2
##contig=<ID=chr1,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT QUERY
chr1 145518989 . N <DEL> 150 PASS END=145809108;REFLEN=290119;SVLEN=-290119;SVTYPE=CNV GT:SM:CN:BC:PE 0/1:0.507438:1:110:28,0
4 changes: 4 additions & 0 deletions Ilmn.Das.App.Wittyer.Test/Resources/WIT-154/truth.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
##fileformat=VCFv4.2
##contig=<ID=chr1,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT TRUTH
chr1 145602105 . N <DEL> 150 PASS END=145808783;REFLEN=206678;SVTYPE=DEL; GT:CN 0/1:1
10 changes: 6 additions & 4 deletions Ilmn.Das.App.Wittyer.Test/SkipBinsTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using Ilmn.Das.App.Wittyer.Vcf.Variants;
using Ilmn.Das.Core.Tries.Extensions;
using Ilmn.Das.Std.AppUtils.Misc;
using Ilmn.Das.Std.XunitUtils;
using Xunit;

namespace Ilmn.Das.App.Wittyer.Test
Expand Down Expand Up @@ -40,12 +41,13 @@ public void SkippedBinsAreIgnoredInStats()
var (_, query, truth) = MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses().First();
var results = MainLauncher.GenerateSampleMetrics(truth, query, false, inputSpecs);

Assert.Equal(1U, results.OverallStats[StatsType.Event].QueryStats.TrueCount);
Assert.Equal(1U, results.OverallStats[StatsType.Event].QueryStats.FalseCount);
Assert.Equal(0.5, results.EventLevelRecallOverall.First(typeRecallTuple => typeRecallTuple.type == WittyerType.CopyNumberGain).recall);
MultiAssert.Equal(2U, results.OverallStats[StatsType.Event].QueryStats.TrueCount);
MultiAssert.Equal(1U, results.OverallStats[StatsType.Event].QueryStats.FalseCount);
MultiAssert.Equal(0.6666666666666666, results.EventLevelRecallOverall.First(typeRecallTuple => typeRecallTuple.type == WittyerType.CopyNumberGain).recall);

var numberOfBinsReportedOn = results.EventLevelRecallPerBin.First().perBinRecall.Count();
Assert.Equal(2, numberOfBinsReportedOn);
MultiAssert.Equal(2, numberOfBinsReportedOn);
MultiAssert.AssertAll();
}
}
}
35 changes: 33 additions & 2 deletions Ilmn.Das.App.Wittyer.Test/TinyTest.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
using System.Collections.Immutable;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.IO;
using System.Linq;
using Ilmn.Das.App.Wittyer.Infrastructure;
using Ilmn.Das.App.Wittyer.Input;
using Ilmn.Das.App.Wittyer.Stats;
using Ilmn.Das.App.Wittyer.Vcf.Variants;
using Ilmn.Das.Core.Tries.Extensions;
using Ilmn.Das.Std.AppUtils.Misc;
using Ilmn.Das.Std.XunitUtils;
using Xunit;

namespace Ilmn.Das.App.Wittyer.Test
Expand All @@ -17,6 +20,9 @@ public class TinyTest

private static readonly FileInfo TinyQuery =
Path.Combine("Resources", "Tiny", "query.vcf").ToFileInfo();

private static readonly FileInfo TinyBed =
Path.Combine("Resources", "Tiny", "bed.bed").ToFileInfo();

[Fact]
public void CrossType_Works()
Expand All @@ -31,7 +37,32 @@ public void CrossType_Works()

var (_, query, truth) = MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses().First();
var results = MainLauncher.GenerateSampleMetrics(truth, query, false, inputSpecs);
Assert.Equal(4U, results.OverallStats[StatsType.Event].QueryStats.TrueCount);
MultiAssert.Equal(5U, results.OverallStats[StatsType.Event].QueryStats.TrueCount);
MultiAssert.Equal(1461995U, results.OverallStats[StatsType.Base].QueryStats.TrueCount);
MultiAssert.Equal(1461995U, results.OverallStats[StatsType.Base].TruthStats.TrueCount);
MultiAssert.AssertAll();
}

[Fact]
public void CrossType_Bases_Works()
{
var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo();
var inputSpecs = InputSpec.GenerateDefaultInputSpecs(false)
.Select(i => InputSpec.Create(i.VariantType, i.BinSizes,
10000, i.PercentDistance, i.ExcludedFilters, i.IncludedFilters,
IncludeBedFile.CreateFromBedFile(TinyBed)))
.ToDictionary(i => i.VariantType, i => i);
var wittyerSettings = WittyerSettings.Create(outputDirectory, TinyTruth, TinyQuery,
ImmutableList<ISamplePair>.Empty, EvaluationMode.CrossTypeAndSimpleCounting,
inputSpecs);

var (_, query, truth) = MainLauncher.GenerateResults(wittyerSettings)
.EnumerateSuccesses().First();
var results = MainLauncher.GenerateSampleMetrics(truth, query, false, inputSpecs);
MultiAssert.Equal(4U, results.OverallStats[StatsType.Event].QueryStats.TrueCount);
MultiAssert.Equal(1451995U, results.OverallStats[StatsType.Base].QueryStats.TrueCount);
MultiAssert.Equal(1451995U, results.OverallStats[StatsType.Base].TruthStats.TrueCount);
MultiAssert.AssertAll();
}
}
}
46 changes: 46 additions & 0 deletions Ilmn.Das.App.Wittyer.Test/Wit149.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
using System.Collections.Immutable;
using System.IO;
using System.Linq;
using Ilmn.Das.App.Wittyer.Infrastructure;
using Ilmn.Das.App.Wittyer.Input;
using Ilmn.Das.App.Wittyer.Stats;
using Ilmn.Das.Core.Tries.Extensions;
using Ilmn.Das.Std.AppUtils.Misc;
using Ilmn.Das.Std.XunitUtils;
using Xunit;

namespace Ilmn.Das.App.Wittyer.Test
{
public class Wit149
{
private static readonly FileInfo Truth =
Path.Combine("Resources", "WIT-149", "truth.vcf").ToFileInfo();

private static readonly FileInfo Query =
Path.Combine("Resources", "WIT-149", "query.vcf").ToFileInfo();

private static readonly FileInfo Bed =
Path.Combine("Resources", "WIT-149", "bed.bed").ToFileInfo();

[Fact]
public void WholeChrIncluded()
{
var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo();
var inputSpecs = InputSpec.GenerateDefaultInputSpecs(false).Select(i => InputSpec.Create(i.VariantType, i.BinSizes,
10000, i.PercentDistance, i.ExcludedFilters, i.IncludedFilters, IncludeBedFile.CreateFromBedFile(Bed)))
.ToDictionary(i => i.VariantType, i => i);
var wittyerSettings = WittyerSettings.Create(outputDirectory, Truth, Query,
ImmutableList<ISamplePair>.Empty, EvaluationMode.CrossTypeAndSimpleCounting,
inputSpecs);

var (_, query, truth) = MainLauncher.GenerateResults(wittyerSettings).EnumerateSuccesses().First();
var results = MainLauncher.GenerateSampleMetrics(truth, query, false, inputSpecs);
// should be end of bed - (start of query + 1 for padded base) = 135086622 - 1 = 135086621
MultiAssert.Equal(135086621U, results.OverallStats[StatsType.Base].QueryStats.TrueCount);
MultiAssert.Equal(135086621U, results.OverallStats[StatsType.Base].TruthStats.TrueCount);
MultiAssert.Equal(1U, results.OverallStats[StatsType.Event].QueryStats.TrueCount);
MultiAssert.Equal(1U, results.OverallStats[StatsType.Event].TruthStats.TrueCount);
MultiAssert.AssertAll();
}
}
}
52 changes: 52 additions & 0 deletions Ilmn.Das.App.Wittyer.Test/Wit154.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
using System.Collections.Generic;
using System.Collections.Immutable;
using System.IO;
using System.Linq;
using Ilmn.Das.App.Wittyer.Infrastructure;
using Ilmn.Das.App.Wittyer.Input;
using Ilmn.Das.App.Wittyer.Stats;
using Ilmn.Das.App.Wittyer.Vcf.Variants;
using Ilmn.Das.Core.Tries.Extensions;
using Ilmn.Das.Std.AppUtils.Misc;
using Ilmn.Das.Std.XunitUtils;
using Xunit;

namespace Ilmn.Das.App.Wittyer.Test
{
public class Wit154
{
private static readonly FileInfo Truth =
Path.Combine("Resources", "WIT-154", "truth.vcf").ToFileInfo();

private static readonly FileInfo Query =
Path.Combine("Resources", "WIT-154", "query.vcf").ToFileInfo();

private static readonly FileInfo Bed =
Path.Combine("Resources", "WIT-154", "bed.bed").ToFileInfo();

private static readonly FileInfo Config =
Path.Combine("Resources", "WIT-154", "config.json").ToFileInfo();

[Fact]
public void CrossType_ComplexBed_Works()
{
var outputDirectory = Path.GetRandomFileName().ToDirectoryInfo();
var inputSpecs = InputSpec.CreateSpecsFromString(
File.ReadAllText(Config.FullName), IncludeBedFile.CreateFromBedFile(Bed))
?.ToDictionary(i => i.VariantType, i => i)
?? new Dictionary<WittyerType, InputSpec>();
var wittyerSettings = WittyerSettings.Create(outputDirectory, Truth, Query,
ImmutableList<ISamplePair>.Empty, EvaluationMode.CrossTypeAndSimpleCounting,
inputSpecs);

var (_, query, truth) = MainLauncher.GenerateResults(wittyerSettings)
.EnumerateSuccesses().First();
var results = MainLauncher
.GenerateSampleMetrics(truth, query, false, inputSpecs);
var baseStats = results.DetailedStats[WittyerType.Deletion].OverallStats[StatsType.Base];
MultiAssert.Equal(206678U, baseStats.QueryStats.TrueCount);
MultiAssert.Equal(206678U, baseStats.TruthStats.TrueCount);
MultiAssert.AssertAll();
}
}
}
31 changes: 9 additions & 22 deletions Ilmn.Das.App.Wittyer/Infrastructure/Quantify.cs
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,7 @@ private static (IBasicStatsCount overallBaseStats,
var typeTotalTpTrees = perTypeTotalTpDictionary.GetOrAdd(type,
_ => new ConcurrentDictionary<IContigInfo, MergedIntervalTree<uint>>());

foreach (var binGroup in variants.Where(it => // must be Assessed
it.Sample.Wit != WitDecision.NotAssessed)
foreach (var binGroup in variants
.GroupBy(v => v.Win.Start))
{
var binStart = binGroup.Key;
Expand All @@ -168,10 +167,15 @@ private static (IBasicStatsCount overallBaseStats,
eventStats.AddTrueEvent();
else if (variant.Sample.Wit == falseDecision)
eventStats.AddFalseEvent();
else if (variant.Sample.Wit != WitDecision.NotAssessed)
else if (variant.Sample.Wit == WitDecision.NotAssessed)
{
// purposely empty, don't do anything on this type but we need to keep this type here
// for base stats.
}
else
throw new InvalidDataException(
$"Unexpected {nameof(WitDecision)} value ({variant.Sample.Wit}) for variant: " +
variant.OriginalVariant.ToShortString());
$"Unexpected {nameof(WitDecision)} value ({variant.Sample.Wit}) for variant: "
+ variant.OriginalVariant.ToShortString());

if (!type.HasBaseLevelStats) continue;

Expand Down Expand Up @@ -243,23 +247,6 @@ private static (IBasicStatsCount overallBaseStats,
stats.AddFalseCount(chr, interval);

}

////This is here as sanity check code, we can remove later if we want.

//if (stats == null) // means TotalTree has no intervals
// continue;

//// eventually get rid of this by replacing with actually keeping track of totals in the stats
//// and after outputting stats, we should do sanity check and crash if not equal.
//var fpTotal = stats.FalseCount.Select(kvp => kvp.Value.GetTotalMergedLength()).Sum();
//var tpTotal = stats.TrueCount.Select(kvp => kvp.Value.GetTotalMergedLength()).Sum();
//var expectedTotal = perBinTotalDictionary[binGroup.Key]
// .Select(kvp => kvp.Value.GetTotalMergedLength()).Sum();
//var actualTotal = fpTotal + tpTotal;

//if (actualTotal != expectedTotal)
// throw new InvalidDataException(
// $"Expected total bases to be {expectedTotal}, but got {actualTotal}!");
}
}

Expand Down
Loading

0 comments on commit 63acf60

Please sign in to comment.