From fc248dfc103e5e7130bbfcdd331be0a233136de5 Mon Sep 17 00:00:00 2001 From: Mark Walker Date: Tue, 3 Dec 2024 13:45:14 -0500 Subject: [PATCH] Prioritize het calls when merging clustered SVs (#9058) --- .../sv/cluster/CanonicalSVCollapser.java | 31 ++- .../cluster/CanonicalSVCollapserUnitTest.java | 185 ++++++++++++++---- .../walkers/sv/SVClusterIntegrationTest.java | 6 +- 3 files changed, 174 insertions(+), 48 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java index 8a1f68567a5..ee6e140b793 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java @@ -122,30 +122,35 @@ public enum FlagFieldLogic { /** * Comparators used for picking the representative genotype for a given sample */ + // Priotize non-ref over ref final Comparator genotypeIsNonRefComparator = (o1, o2) -> { final long count1 = Math.min(1, o1.getAlleles().stream().filter(Allele::isNonReference).filter(Allele::isCalled).count()); final long count2 = Math.min(1, o2.getAlleles().stream().filter(Allele::isNonReference).filter(Allele::isCalled).count()); return Long.compare(count1, count2); }; + // Priotize fewer ALT alleles over more. When applied after non-ref comparator, hom-ref genotypes will not be encountered. final Comparator genotypeNonRefCountComparator = (o1, o2) -> { final long count1 = o1.getAlleles().stream().filter(Allele::isNonReference).filter(Allele::isCalled).count(); final long count2 = o2.getAlleles().stream().filter(Allele::isNonReference).filter(Allele::isCalled).count(); - return Long.compare(count1, count2); + return Long.compare(count2, count1); }; + // Priotize called genotypes final Comparator genotypeCalledComparator = (o1, o2) -> { final long count1 = o1.getAlleles().stream().filter(Allele::isCalled).count(); final long count2 = o2.getAlleles().stream().filter(Allele::isCalled).count(); return Long.compare(count1, count2); }; + // Priotize higher quality final Comparator genotypeQualityComparator = (o1, o2) -> { final int quality1 = VariantContextGetters.getAttributeAsInt(o1, VCFConstants.GENOTYPE_QUALITY_KEY, 0); final int quality2 = VariantContextGetters.getAttributeAsInt(o2, VCFConstants.GENOTYPE_QUALITY_KEY, 0); return Integer.compare(quality1, quality2); }; + // Priotize higher depth genotyping quality final Comparator genotypeCopyNumberQualityComparator = new Comparator() { @Override public int compare(Genotype o1, Genotype o2) { @@ -155,6 +160,7 @@ public int compare(Genotype o1, Genotype o2) { } }; + // Priotize depth genotypes closer to reference final Comparator genotypeCopyNumberComparator = new Comparator() { @Override public int compare(Genotype o1, Genotype o2) { @@ -162,7 +168,25 @@ public int compare(Genotype o1, Genotype o2) { final int copyNumber1 = VariantContextGetters.getAttributeAsInt(o1, GATKSVVCFConstants.COPY_NUMBER_FORMAT, 0); final int expectedQualityNumber2 = VariantContextGetters.getAttributeAsInt(o2, GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT, 0); final int copyNumber2 = VariantContextGetters.getAttributeAsInt(o2, GATKSVVCFConstants.COPY_NUMBER_FORMAT, 0); - return Double.compare(Math.abs(expectedQualityNumber1 - copyNumber1), Math.abs(expectedQualityNumber2 - copyNumber2)); + return Double.compare(Math.abs(expectedQualityNumber2 - copyNumber2), Math.abs(expectedQualityNumber1 - copyNumber1)); + } + }; + + // Priotize DEL over DUP as final tiebreaker + final Comparator genotypeDelOverDupComparator = new Comparator() { + @Override + public int compare(Genotype o1, Genotype o2) { + final int expectedCN1 = VariantContextGetters.getAttributeAsInt(o1, GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT, 0); + final boolean isDel1 = VariantContextGetters.getAttributeAsInt(o1, GATKSVVCFConstants.COPY_NUMBER_FORMAT, expectedCN1) < expectedCN1; + final int expectedCN2 = VariantContextGetters.getAttributeAsInt(o2, GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT, 0); + final boolean isDel2 = VariantContextGetters.getAttributeAsInt(o2, GATKSVVCFConstants.COPY_NUMBER_FORMAT, expectedCN2) < expectedCN2; + if (isDel1 && !isDel2) { + return 1; + } else if (isDel2 && !isDel1) { + return -1; + } else { + return 0; + } } }; @@ -461,7 +485,8 @@ protected Genotype getRepresentativeGenotype(final Collection genotype .thenComparing(genotypeQualityComparator) .thenComparing(genotypeNonRefCountComparator) .thenComparing(genotypeCopyNumberQualityComparator) - .thenComparing(genotypeCopyNumberComparator)).get(); + .thenComparing(genotypeCopyNumberComparator) + .thenComparing(genotypeDelOverDupComparator)).get(); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java index 3a692ab4001..0b83beadb0b 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java @@ -646,23 +646,23 @@ public Object[][] collapseSampleGenotypesTestData() { }, // het preferred over hom ref even with lower gq { + "sample", + Lists.newArrayList( + Lists.newArrayList(Allele.REF_N, Allele.REF_N), + Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS) + ), + Lists.newArrayList( + createGenotypeTestAttributesWithGQ(2, 30), + createGenotypeTestAttributesWithGQ(2, 20) + ), + Allele.REF_N, + GenotypeBuilder.create( "sample", - Lists.newArrayList( - Lists.newArrayList(Allele.REF_N, Allele.REF_N), - Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS) - ), - Lists.newArrayList( - createGenotypeTestAttributesWithGQ(2, 30), - createGenotypeTestAttributesWithGQ(2, 20) - ), - Allele.REF_N, - GenotypeBuilder.create( - "sample", - Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS), - createGenotypeTestAttributesWithGQ(2, 20) - ) - }, - // hom var preferred over het + Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS), + createGenotypeTestAttributesWithGQ(2, 20) + ) + }, + // het preferred over hom-var { "sample", Lists.newArrayList( @@ -676,11 +676,11 @@ public Object[][] collapseSampleGenotypesTestData() { Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS), + Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS), createGenotypeTestAttributes(2) ) }, - // hom-var over het if GQ equal + // het over hom-var if GQ equal { "sample", Lists.newArrayList( @@ -694,11 +694,11 @@ public Object[][] collapseSampleGenotypesTestData() { Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS), + Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS), createGenotypeTestAttributes(2) ) }, - // het over hom-var if GQ is higher + // hom-var over het if GQ is higher { "sample", Lists.newArrayList( @@ -706,17 +706,17 @@ public Object[][] collapseSampleGenotypesTestData() { Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS) ), Lists.newArrayList( - createGenotypeTestAttributesWithGQ(2, 30), - createGenotypeTestAttributesWithGQ(2, 40) + createGenotypeTestAttributesWithGQ(2, 40), + createGenotypeTestAttributesWithGQ(2, 30) ), Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS), + Lists.newArrayList(Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS), createGenotypeTestAttributesWithGQ(2, 40) ) }, - // hom var preferred over hom-ref too + // het preferred over hom-ref too { "sample", Lists.newArrayList( @@ -732,7 +732,7 @@ public Object[][] collapseSampleGenotypesTestData() { Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS), + Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS), createGenotypeTestAttributesWithGQ(2, 30) ) }, @@ -791,7 +791,7 @@ public Object[][] collapseSampleGenotypesTestData() { Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS), + Lists.newArrayList(Allele.REF_N, Allele.REF_N, Allele.SV_SIMPLE_INS), createGenotypeTestAttributes(3) ) }, @@ -813,7 +813,7 @@ public Object[][] collapseSampleGenotypesTestData() { Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS), + Lists.newArrayList(Allele.REF_N, Allele.REF_N, Allele.SV_SIMPLE_INS), createGenotypeTestAttributes(3) ) }, @@ -841,7 +841,7 @@ public Object[][] collapseSampleGenotypesTestData() { Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS), + Lists.newArrayList(Allele.REF_N, Allele.REF_N, Allele.SV_SIMPLE_INS), createGenotypeTestAttributes(3) ) }, @@ -1083,7 +1083,7 @@ public Object[][] collapseSampleGenotypesTestData() { createGenotypeTestAttributes(1, 1) ) }, - // Multi-allelic CNV, rare case with equal CNQ take CN!=ECN + // Multi-allelic CNV, with no CNQ use copy state closest to ref { "sample", Lists.newArrayList( @@ -1091,35 +1091,34 @@ public Object[][] collapseSampleGenotypesTestData() { Collections.singletonList(Allele.NO_CALL) ), Lists.newArrayList( - createGenotypeTestAttributesWithCNQ(1, 1, 30), - createGenotypeTestAttributesWithCNQ(1, 0, 30) + createGenotypeTestAttributes(1, 1), + createGenotypeTestAttributes(1, 2) ), Allele.REF_N, GenotypeBuilder.create( "sample", Lists.newArrayList(Allele.NO_CALL), - createGenotypeTestAttributesWithCNQ(1, 0, 30) + createGenotypeTestAttributes(1, 1) ) }, - // Multi-allelic CNV, haploid dup + // Multi-allelic CNV, when CNQ equal use copy state closest to ref { "sample", Lists.newArrayList( - Collections.singletonList(Allele.NO_CALL), - Collections.singletonList(Allele.NO_CALL) + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL) ), Lists.newArrayList( - createGenotypeTestAttributes(1, 1), - createGenotypeTestAttributes(1, 2) + createGenotypeTestAttributesWithCNQ(1, 1, 30), + createGenotypeTestAttributesWithCNQ(1, 2, 30) ), Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.NO_CALL), - createGenotypeTestAttributes(1, 2) + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(1, 1, 30) ) }, - // Multi-allelic CNV, when CNQ equal use CN!=ECN { "sample", Lists.newArrayList( @@ -1128,16 +1127,32 @@ public Object[][] collapseSampleGenotypesTestData() { ), Lists.newArrayList( createGenotypeTestAttributesWithCNQ(2, 2, 30), - createGenotypeTestAttributesWithCNQ(2, 1, 30) + createGenotypeTestAttributesWithCNQ(2, 3, 30) ), Allele.REF_N, GenotypeBuilder.create( "sample", Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(2, 2, 30) + ) + }, + { + "sample", + Lists.newArrayList( + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL) + ), + Lists.newArrayList( + createGenotypeTestAttributesWithCNQ(2, 2, 30), createGenotypeTestAttributesWithCNQ(2, 1, 30) + ), + Allele.REF_N, + GenotypeBuilder.create( + "sample", + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(2, 2, 30) ) }, - // Multi-allelic CNV, when CNQ equal use CN!=ECN { "sample", Lists.newArrayList( @@ -1152,7 +1167,41 @@ public Object[][] collapseSampleGenotypesTestData() { GenotypeBuilder.create( "sample", Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(2, 2, 30) + ) + }, + { + "sample", + Lists.newArrayList( + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL) + ), + Lists.newArrayList( + createGenotypeTestAttributesWithCNQ(2, 1, 30), createGenotypeTestAttributesWithCNQ(2, 0, 30) + ), + Allele.REF_N, + GenotypeBuilder.create( + "sample", + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(2, 1, 30) + ) + }, + { + "sample", + Lists.newArrayList( + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL) + ), + Lists.newArrayList( + createGenotypeTestAttributesWithCNQ(2, 5, 30), + createGenotypeTestAttributesWithCNQ(2, 6, 30) + ), + Allele.REF_N, + GenotypeBuilder.create( + "sample", + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(2, 5, 30) ) }, // Multi-allelic CNV, conflicting del and dup genotypes determined by CNQ @@ -1190,6 +1239,58 @@ public Object[][] collapseSampleGenotypesTestData() { createGenotypeTestAttributesWithCNQ(1, 2, 50) ) }, + // DEL prioritized over DUP tiebreaker when same distance from expected copy state + { + "sample", + Lists.newArrayList( + Collections.singletonList(Allele.NO_CALL), + Collections.singletonList(Allele.NO_CALL) + ), + Lists.newArrayList( + createGenotypeTestAttributesWithCNQ(1, 2, 30), + createGenotypeTestAttributesWithCNQ(1, 0, 30) + ), + Allele.REF_N, + GenotypeBuilder.create( + "sample", + Lists.newArrayList(Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(1, 0, 30) + ) + }, + { + "sample", + Lists.newArrayList( + Collections.singletonList(Allele.NO_CALL), + Collections.singletonList(Allele.NO_CALL) + ), + Lists.newArrayList( + createGenotypeTestAttributesWithCNQ(2, 0, 30), + createGenotypeTestAttributesWithCNQ(2, 4, 30) + ), + Allele.REF_N, + GenotypeBuilder.create( + "sample", + Lists.newArrayList(Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(2, 0, 30) + ) + }, + { + "sample", + Lists.newArrayList( + Collections.singletonList(Allele.NO_CALL), + Collections.singletonList(Allele.NO_CALL) + ), + Lists.newArrayList( + createGenotypeTestAttributesWithCNQ(2, 1, 30), + createGenotypeTestAttributesWithCNQ(2, 3, 30) + ), + Allele.REF_N, + GenotypeBuilder.create( + "sample", + Lists.newArrayList(Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(2, 1, 30) + ) + }, }; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java index ec3e6e15f96..75016e8744b 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java @@ -460,11 +460,11 @@ public void testClusterSampleOverlap() { final int nonRefGenotypeCount = (int) variant.getGenotypes().stream().filter(g -> SVCallRecordUtils.isAltGenotype(g)).count(); Assert.assertEquals(nonRefGenotypeCount, 71); final int alleleCount = (int) variant.getGenotypes().stream().flatMap(g -> g.getAlleles().stream()).filter(SVCallRecordUtils::isAltAllele).count(); - Assert.assertEquals(alleleCount, 94); + Assert.assertEquals(alleleCount, 87); final Genotype g = variant.getGenotype("HG00129"); - Assert.assertTrue(g.isHomVar()); + Assert.assertTrue(g.isHet()); Assert.assertEquals(VariantContextGetters.getAttributeAsInt(g, GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT, -1), 2); - Assert.assertEquals(VariantContextGetters.getAttributeAsInt(g, GATKSVVCFConstants.COPY_NUMBER_FORMAT, -1), 0); + Assert.assertEquals(VariantContextGetters.getAttributeAsInt(g, GATKSVVCFConstants.COPY_NUMBER_FORMAT, -1), 1); } } Assert.assertEquals(expectedRecordsFound, 1);