Skip to content

Commit

Permalink
partial haplotype code format change
Browse files Browse the repository at this point in the history
  • Loading branch information
Schaudge committed Sep 8, 2024
1 parent d634457 commit ee7c067
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -838,8 +838,7 @@ static Map<VariantContext, Triple<Integer, Integer, PhaseGroup>> constructPhaseS
// genotype for the variant is homozygous since this method does not consider the ref haplotype
final boolean compIsOnAllAltHaps = haplotypesWithComp.size() == totalAvailableHaplotypes;
final Sets.SetView<Haplotype> intersectionHaplotype = Sets.intersection(haplotypesWithCall, haplotypesWithComp);
final OptionalInt maxPhaseConfidenceReads = intersectionHaplotype.stream().mapToInt(Haplotype::getWeakness).max();
final int phaseReadsCount = maxPhaseConfidenceReads.isPresent() ? maxPhaseConfidenceReads.getAsInt() : 0;
final int phaseReadsCount = intersectionHaplotype.stream().mapToInt(Haplotype::getWeakness).max().orElse(0);

// For some high frequency complex long indels, that many reads not coverage the whole indels completely,
// and variants will dispatch to different haplotypes, so we relax the conditions for phase set combination!
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -744,9 +744,9 @@ public void addGivenAlleles(final List<Event> givenAlleles, final int maxMnpDist

// choose the highest-scoring haplotypes along with the reference for building force-calling haplotypes
final List<Haplotype> baseHaplotypes = getHaplotypeList().stream()
.sorted(Comparator.comparing(Haplotype::isReference).thenComparingDouble(hap -> hap.getScore()).reversed())
.sorted(Comparator.comparing(Haplotype::isReference).thenComparingDouble(Haplotype::getScore).reversed())
.limit(AssemblyBasedCallerUtils.NUM_HAPLOTYPES_TO_INJECT_FORCE_CALLING_ALLELES_INTO)
.collect(Collectors.toList());
.toList();

for (final Event given : givenAlleles) {
final List<Event> assembledEvents = assembledEventsByStart.getOrDefault(given.getStart(), Collections.emptyList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,8 @@ public List<VariantContext> callRegion(final AssemblyRegion originalAssemblyRegi
final Set<Event> goodPileupEvents = goodAndBadPileupEvents.getLeft();
final Set<Event> badPileupEvents = goodAndBadPileupEvents.getRight();

goodPileupEvents.forEach(allVariationEvents::add);
givenAlleles.forEach(allVariationEvents::add);
allVariationEvents.addAll(goodPileupEvents);
allVariationEvents.addAll(givenAlleles);

final AssemblyRegionTrimmer.Result trimmingResult = trimmer.trim(originalAssemblyRegion, allVariationEvents, referenceContext);
if (!trimmingResult.isVariationPresent()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import com.google.common.primitives.Doubles;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.Locatable;
Expand Down Expand Up @@ -34,6 +33,7 @@
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;

import java.util.*;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
Expand Down Expand Up @@ -102,12 +102,12 @@ public CalledHaplotypes callMutations(

final List<Integer> eventStarts = EventMap.getEventStartPositions(haplotypes).stream()
.filter(loc -> activeRegionWindow.getStart() <= loc && loc <= activeRegionWindow.getEnd())
.collect(Collectors.toList());
.toList();

final Set<Haplotype> calledHaplotypes = new HashSet<>();
final List<VariantContext> returnCalls = new ArrayList<>();

if(withBamOut){
if (withBamOut) {
//add annotations to reads for alignment regions and calling regions
AssemblyBasedCallerUtils.annotateReadLikelihoodsWithRegions(logReadLikelihoods, activeRegionWindow);
}
Expand All @@ -118,7 +118,7 @@ public CalledHaplotypes callMutations(
final AlleleLikelihoods<Fragment, Haplotype> logFragmentLikelihoods = logReadLikelihoods.groupEvidence(MTAC.independentMates ? read -> read : GATKRead::getName, Fragment::createAndAvoidFailure);

final Set<Event> potentialSomaticEventsInRegion = new HashSet<>();
for( final int loc : eventStarts ) {
for (final int loc : eventStarts ) {
final List<VariantContext> eventsAtThisLoc = AssemblyBasedCallerUtils.getVariantsFromActiveHaplotypes(loc, haplotypes, false);
VariantContext merged = AssemblyBasedCallerUtils.makeMergedVariantContext(eventsAtThisLoc);
if( merged == null ) {
Expand Down Expand Up @@ -153,20 +153,21 @@ public CalledHaplotypes callMutations(


final Set<Allele> forcedAlleles = AssemblyBasedCallerUtils.allelesConsistentWithGivenAlleles(givenAlleles, mergedVC);
double maxAlternativeLogOdd = mergedVC.getAlternateAlleles().stream().mapToDouble(tumorLogOdds::get).max().orElse(0);
final List<Allele> tumorAltAlleles = mergedVC.getAlternateAlleles().size() > 1 ? mergedVC.getAlternateAlleles().stream()
.filter(allele -> tumorLogOdds.getAlt(allele) > MTAC.getEmissionLogOdds() || (forcedAlleles.contains(allele) && tumorLogOdds.getAlt(allele) * 11 > maxAlternativeLogOdd))
.collect(Collectors.toList()) : mergedVC.getAlternateAlleles().stream()
.filter(allele -> tumorLogOdds.getAlt(allele) > MTAC.getEmissionLogOdds() || forcedAlleles.contains(allele))
.collect(Collectors.toList());
final List<Allele> allAltAlleles = mergedVC.getAlternateAlleles();
final double maxAlternativeLogOdd = allAltAlleles.stream().mapToDouble(tumorLogOdds::get).max().orElse(0.0001);
Function<Allele, Boolean> alleleFilters = (allele) -> forcedAlleles.contains(allele) || tumorLogOdds.getAlt(allele) > MTAC.getEmissionLogOdds();
if (allAltAlleles.size() > 1)
alleleFilters = (allele) -> (forcedAlleles.contains(allele) && tumorLogOdds.getAlt(allele) * 11 > maxAlternativeLogOdd)
|| tumorLogOdds.getAlt(allele) > MTAC.getEmissionLogOdds();
final List<Allele> tumorAltAlleles = allAltAlleles.stream().filter(alleleFilters::apply).toList();

final List<Allele> allelesToGenotype = tumorAltAlleles.stream()
.filter(allele -> forcedAlleles.contains(allele) || !hasNormal || MTAC.genotypeGermlineSites || normalLogOdds.getAlt(allele) > MathUtils.log10ToLog(MTAC.normalLog10Odds))
.toList();

// record somatic alleles for later use in the Event Count annotation
// note that in tumor-only calling it does not attempt to detect germline events
mergedVC.getAlternateAlleles().stream()
allAltAlleles.stream()
.filter(allele -> tumorLogOdds.getAlt(allele) > MTAC.getEmissionLogOdds())
.filter(allele -> !hasNormal || normalLogOdds.getAlt(allele) > MathUtils.log10ToLog(MTAC.normalLog10Odds))
.map(allele -> new Event(mergedVC.getContig(), mergedVC.getStart(), mergedVC.getReference(), allele))
Expand Down Expand Up @@ -220,10 +221,9 @@ public CalledHaplotypes callMutations(

final AlleleLikelihoods<GATKRead, Allele> trimmedLikelihoodsForAnnotation = logReadAlleleLikelihoods.marginalize(trimmedToUntrimmedAlleleMap);


final VariantContext annotatedCall = annotationEngine.annotateContext(trimmedCall, featureContext, referenceContext,
trimmedLikelihoodsForAnnotation, Optional.of(trimmedLikelihoods), Optional.of(logFragmentLikelihoods), Optional.empty(), a -> true);
if(withBamOut) {
if (withBamOut) {
AssemblyBasedCallerUtils.annotateReadLikelihoodsWithSupportedAlleles(trimmedCall, trimmedLikelihoods, Fragment::getReads);
}

Expand All @@ -237,15 +237,14 @@ public CalledHaplotypes callMutations(
}

final List<VariantContext> outputCalls = AssemblyBasedCallerUtils.phaseCalls(returnCalls, calledHaplotypes);
final int eventCount = outputCalls.size();

// calculate the number of somatic events in the best haplotype of each called variant
final Map<Haplotype, MutableInt> haplotypeSupportCounts = logReadLikelihoods.alleles().stream()
.collect(Collectors.toMap(hap -> hap, label -> new MutableInt(0)));
logReadLikelihoods.bestAllelesBreakingTies()
.forEach(bestHaplotype -> haplotypeSupportCounts.get(bestHaplotype.allele).increment());

final Map<Event, List<Haplotype>> haplotypesByEvent= new HashMap<>();
final Map<Event, List<Haplotype>> haplotypesByEvent = new HashMap<>();
for (final Haplotype haplotype : logReadLikelihoods.alleles()) {
for (final Event event : haplotype.getEventMap().getEvents()) {
haplotypesByEvent.computeIfAbsent(event, e -> new ArrayList<>()).add(haplotype);
Expand Down

0 comments on commit ee7c067

Please sign in to comment.