Skip to content

Commit

Permalink
Adds QD and AS_QD emission from VariantAnnotator on GVS input (broadi…
Browse files Browse the repository at this point in the history
…nstitute#8978)

* Updating VariantAnnotator to emit QD and AS_QD on GVS input

* clean up

* addressing comments
  • Loading branch information
meganshand authored Sep 12, 2024
1 parent 6bb5217 commit 2ba3a15
Show file tree
Hide file tree
Showing 5 changed files with 3,450 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
Expand Down Expand Up @@ -57,7 +58,7 @@ public Map<String, Object> annotate(final ReferenceContext ref,
final VariantContext vc,
final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
Utils.nonNull(vc);
if ( !vc.hasLog10PError() ) {
if ( !(vc.hasLog10PError() || vc.hasAttribute(GATKVCFConstants.RAW_QUAL_APPROX_KEY)) ) {
return Collections.emptyMap();
}

Expand All @@ -72,7 +73,16 @@ public Map<String, Object> annotate(final ReferenceContext ref,
return Collections.emptyMap();
}

final double qual = -10.0 * vc.getLog10PError();
final double qual;
if (vc.hasLog10PError()) {
qual = -10.0 * vc.getLog10PError();
} else {
try {
qual = vc.getAttributeAsInt(GATKVCFConstants.RAW_QUAL_APPROX_KEY, 0);
} catch (NumberFormatException e) {
throw new GATKException("Error at: " + vc.getContig() + ":" + vc.getStart() + " when parsing " + GATKVCFConstants.RAW_QUAL_APPROX_KEY + ": " + e.getMessage());
}
}
double QD = qual / depth;

// Hack: see note in the fixTooHighQD method below
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFCompoundHeaderLine;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.apache.commons.lang3.StringUtils;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.ReferenceContext;
Expand Down Expand Up @@ -78,7 +76,9 @@ public List<VCFCompoundHeaderLine> getRawDescriptions() {
public Map<String, Object> annotate(final ReferenceContext ref,
final VariantContext vc,
final AlleleLikelihoods<GATKRead, Allele> likelihoods ) {
return Collections.emptyMap();
// first vc is used for the annotation and the second vc here is used just to get the alleles, so in this case we can pass the same vc for both
Map<String, Object> annotation = finalizeRawData(vc, vc);
return (annotation == null ? Collections.emptyMap() : Collections.singletonMap(getKeyNames().get(0), annotation.get(getKeyNames().get(0))));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,7 @@

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;

Expand Down Expand Up @@ -364,6 +361,46 @@ public void testDeNovo() {
Assert.assertEquals(lociWithHighConfidenceDeNovo, new int[] {10130767, 10197999});
}

//GVS vcfs have QUALapprox, but no QUAL. This tests that we can still calculate QD and AS_QD without QUAL (using QUALapprox and AS_QUALapprox)
@Test
public void addQdToGvsVcf() {
final File inputVCF = getTestFile("gvsCallsetWithNoQual.vcf");
final File outputVCF = createTempFile("output", ".vcf");
final ArgumentsBuilder args = new ArgumentsBuilder()
.addVCF(inputVCF)
.addOutput(outputVCF)
.addInterval("chr1:66506-66507")
.add("A", "QualByDepth")
.add("A", "AS_QualByDepth");
runCommandLine(args.getArgsList());

Map<Integer, Map<String, String>> expectedAnnotations = new HashMap<>();
Map<String, String> expectedAnnotations66506 = new HashMap<>();
Map<String, String> expectedAnnotations66507 = new HashMap<>();

// bi-allelic site
expectedAnnotations66506.put("QD", "4.23");
expectedAnnotations66506.put("AS_QD", "4.23");
//make sure that the AS_QUALapprox doesn't change from the input
expectedAnnotations66506.put("AS_QUALapprox", "0|110");

//multi-allelic site
expectedAnnotations66507.put("QD", "6.29");
expectedAnnotations66507.put("AS_QD", "[9.00, 9.26]");
expectedAnnotations66507.put("AS_QUALapprox", "0|396|537");

expectedAnnotations.put(66506, expectedAnnotations66506);
expectedAnnotations.put(66507, expectedAnnotations66507);


final List<VariantContext> outputVCs = VariantContextTestUtils.getVariantContexts(outputVCF);
for (final VariantContext outputVC : outputVCs) {
Assert.assertEquals(outputVC.getAttribute("QD"), expectedAnnotations.get(outputVC.getStart()).get("QD"));
Assert.assertEquals(outputVC.getAttribute("AS_QD").toString(), expectedAnnotations.get(outputVC.getStart()).get("AS_QD"));
Assert.assertEquals(outputVC.getAttribute("AS_QUALapprox"), expectedAnnotations.get(outputVC.getStart()).get("AS_QUALapprox"));
}
}

private static String keyForVariant( final VariantContext variant ) {
return String.format("%s:%d-%d %s", variant.getContig(), variant.getStart(), variant.getEnd(), variant.getAlleles());
}
Expand Down
Loading

0 comments on commit 2ba3a15

Please sign in to comment.