From 6bb52176a8e32ab74ec4545592388b3dece91b34 Mon Sep 17 00:00:00 2001 From: Mark Walker Date: Wed, 11 Sep 2024 12:31:25 -0400 Subject: [PATCH] Require both overlap and breakend proximity for depth-only SV clustering (#8962) --- .../tools/sv/cluster/CanonicalSVLinkage.java | 2 +- .../tools/sv/cluster/ClusteringParameters.java | 3 ++- .../hellbender/tools/sv/SVTestUtils.java | 2 +- .../tools/sv/cluster/SVClusterEngineTest.java | 2 +- ...ntGermlineCNVSegmentationIntegrationTest.java | 2 +- .../walkers/sv/SVClusterIntegrationTest.java | 16 ++++++++-------- .../walkers/sv/SVConcordanceIntegrationTest.java | 12 ++++++------ 7 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVLinkage.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVLinkage.java index 854f4120b51..8d5b381ee81 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVLinkage.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVLinkage.java @@ -45,7 +45,7 @@ public class CanonicalSVLinkage extends SVClusterLinkage public static final double DEFAULT_RECIPROCAL_OVERLAP_DEPTH_ONLY = 0.8; public static final double DEFAULT_SIZE_SIMILARITY_DEPTH_ONLY = 0; - public static final int DEFAULT_WINDOW_DEPTH_ONLY = 0; + public static final int DEFAULT_WINDOW_DEPTH_ONLY = 10000000; public static final double DEFAULT_SAMPLE_OVERLAP_DEPTH_ONLY = 0; public static final double DEFAULT_RECIPROCAL_OVERLAP_MIXED = 0.8; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/ClusteringParameters.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/ClusteringParameters.java index 2365c5041e8..fe665c96c72 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/ClusteringParameters.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/ClusteringParameters.java @@ -17,6 +17,7 @@ public class ClusteringParameters { // if true, both reciprocal overlap and window criteria must be met // if false, reciprocal overlap and/or window criteria must be met + // NOTE this is currently set to true in all tools but is being kept for possible future use private final boolean requiresOverlapAndProximity; // returns true if two given records are the correct type of pair for this parameter set @@ -57,7 +58,7 @@ public boolean isValidPair(final SVCallRecord a, final SVCallRecord b) { } public static ClusteringParameters createDepthParameters(final double reciprocalOverlap, final double sizeSimilarity, final int window, final double sampleOverlap) { - return new ClusteringParameters(reciprocalOverlap, sizeSimilarity, window, sampleOverlap, false, (a,b) -> a.isDepthOnly() && b.isDepthOnly()); + return new ClusteringParameters(reciprocalOverlap, sizeSimilarity, window, sampleOverlap, true, (a,b) -> a.isDepthOnly() && b.isDepthOnly()); } public static ClusteringParameters createMixedParameters(final double reciprocalOverlap, final double sizeSimilarity, final int window, final double sampleOverlap) { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVTestUtils.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVTestUtils.java index 35959b472d2..5a8e97a05fd 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVTestUtils.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVTestUtils.java @@ -56,7 +56,7 @@ public static SVClusterEngine getNewDefaultMaxCliqueEngine() { return new SVClusterEngine(SVClusterEngine.CLUSTERING_TYPE.MAX_CLIQUE, defaultCollapser::collapse, getNewDefaultLinkage(), hg38Dict); } - public static final ClusteringParameters defaultDepthOnlyParameters = ClusteringParameters.createDepthParameters(0.8, 0, 0, 0); + public static final ClusteringParameters defaultDepthOnlyParameters = ClusteringParameters.createDepthParameters(0.8, 0, 10000000, 0); public static final ClusteringParameters defaultMixedParameters = ClusteringParameters.createMixedParameters(0.8, 0, 1000, 0); public static final ClusteringParameters defaultEvidenceParameters = ClusteringParameters.createPesrParameters(0.5, 0, 500, 0); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineTest.java index 38849f533bb..3b51938e997 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineTest.java @@ -26,7 +26,7 @@ public class SVClusterEngineTest { private final SVClusterEngine engine = SVTestUtils.defaultSingleLinkageEngine; - private static final ClusteringParameters depthOnlyParametersSizeSimilarity = ClusteringParameters.createDepthParameters(0.1, 0.5, 0, 0); + private static final ClusteringParameters depthOnlyParametersSizeSimilarity = ClusteringParameters.createDepthParameters(0.1, 0.5, 10000000, 0); private static final ClusteringParameters mixedParametersSizeSimilarity = ClusteringParameters.createMixedParameters(0.1, 0.5, 5000, 0); private static final ClusteringParameters evidenceParametersSizeSimilarity = ClusteringParameters.createPesrParameters(0.1, 0.5, 5000, 0); private final CanonicalSVLinkage linkageSizeSimilarity = new CanonicalSVLinkage<>(SVTestUtils.hg38Dict, false); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/JointGermlineCNVSegmentationIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/JointGermlineCNVSegmentationIntegrationTest.java index 2172881795f..8f27d2a6389 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/JointGermlineCNVSegmentationIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/JointGermlineCNVSegmentationIntegrationTest.java @@ -204,7 +204,7 @@ public void testOverlappingEvents(final List inputVcfs) { .add(JointGermlineCNVSegmentation.MODEL_CALL_INTERVALS_LONG_NAME, getToolTestDataDir() + "intervals.chr22.interval_list") .addInterval("22:22,538,114-23,538,437") .add(JointGermlineCNVSegmentation.CLUSTERING_INTERVAL_OVERLAP_LONG_NAME, 0.8) - .add(JointGermlineCNVSegmentation.CLUSTERING_BREAKEND_WINDOW_LONG_NAME, 0); + .add(JointGermlineCNVSegmentation.CLUSTERING_BREAKEND_WINDOW_LONG_NAME, 10000000); inputVcfs.forEach(vcf -> args.addVCF(vcf)); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java index d79f8b49588..caff0aa9675 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java @@ -113,7 +113,7 @@ public void testMergeHelper(final boolean omitMembers) { .add(SVCluster.ALGORITHM_LONG_NAME, SVCluster.CLUSTER_ALGORITHM.SINGLE_LINKAGE) .add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 1) - .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 0) + .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000) .add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 1) .add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 0) @@ -188,7 +188,7 @@ public void testClusterSingleLinkage() { .add(StandardArgumentDefinitions.REFERENCE_LONG_NAME, REFERENCE_PATH) .add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5) - .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000) + .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000) .add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1) .add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000) @@ -204,7 +204,7 @@ public void testClusterSingleLinkage() { Assert.assertEquals(header.getSampleNamesInOrder(), Arrays.asList("HG00096", "HG00129", "HG00140", "NA18945", "NA18956")); - Assert.assertEquals(records.size(), 1338); + Assert.assertEquals(records.size(), 1344); // Check for one record int expectedRecordsFound = 0; @@ -255,7 +255,7 @@ public void testAgainstSimpleImplementation() { .add(StandardArgumentDefinitions.REFERENCE_LONG_NAME, REFERENCE_PATH) .add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5) - .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000) + .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000) .add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1) .add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000) @@ -279,7 +279,7 @@ public void testAgainstSimpleImplementation() { final Pair> testVcf = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()); final ReferenceSequenceFile referenceSequenceFile = ReferenceUtils.createReferenceReader(new GATKPath(REFERENCE_PATH)); - final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 2000, 0); + final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 10000000, 0); final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 0, 2000, 0); final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 0, 500, 0); final SVClusterEngine engine = SVClusterEngineFactory.createCanonical( @@ -347,7 +347,7 @@ public void testClusterMaxClique(final boolean fastMode) { .add(SVCluster.ALGORITHM_LONG_NAME, SVCluster.CLUSTER_ALGORITHM.MAX_CLIQUE) .add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5) - .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000) + .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000) .add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1) .add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000) @@ -414,7 +414,7 @@ public void testClusterSampleOverlap() { .add(SVCluster.ALGORITHM_LONG_NAME, SVCluster.CLUSTER_ALGORITHM.SINGLE_LINKAGE) .add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0.5) .add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5) - .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000) + .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000) .add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0.5) .add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1) .add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000) @@ -478,7 +478,7 @@ public void testAllosome() { .add(StandardArgumentDefinitions.REFERENCE_LONG_NAME, REFERENCE_PATH) .add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5) - .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000) + .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000) .add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1) .add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordanceIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordanceIntegrationTest.java index be35bdfdeb7..f9856585266 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordanceIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordanceIntegrationTest.java @@ -53,7 +53,7 @@ public void testRefPanel() { .add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, GATKBaseTest.FULL_HG38_DICT) .add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5) - .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000) + .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000) .add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1) .add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000) @@ -69,7 +69,7 @@ public void testRefPanel() { final Pair> outputVcf = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()); final SAMSequenceDictionary dictionary = SVTestUtils.hg38Dict; - final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 2000, 0); + final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 10000000, 0); final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 0, 2000, 0); final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 0, 500, 0); final SVConcordanceLinkage linkage = new SVConcordanceLinkage(dictionary); @@ -180,7 +180,7 @@ public void testSelf() { .add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5) .add(SVClusterEngineArgumentsCollection.DEPTH_SIZE_SIMILARITY_NAME, 0) - .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000) + .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000) .add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1) .add(SVClusterEngineArgumentsCollection.MIXED_SIZE_SIMILARITY_NAME, 0) @@ -197,7 +197,7 @@ public void testSelf() { final Pair> outputVcf = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()); final SAMSequenceDictionary dictionary = SVTestUtils.hg38Dict; - final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 2000, 0); + final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 10000000, 0); final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 0, 2000, 0); final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 0, 500, 0); final SVConcordanceLinkage linkage = new SVConcordanceLinkage(dictionary); @@ -279,7 +279,7 @@ private void assertPerfectConcordance(final File output, final String evalVcfPat final Pair> outputVcf = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()); final SAMSequenceDictionary dictionary = SVTestUtils.hg38Dict; - final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 2000, 0); + final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 10000000, 0); final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 0, 2000, 0); final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 0, 500, 0); final SVConcordanceLinkage linkage = new SVConcordanceLinkage(dictionary); @@ -331,7 +331,7 @@ public void testSitesOnly() { .add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, GATKBaseTest.FULL_HG38_DICT) .add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5) - .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000) + .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000) .add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1) .add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000)