package org.broadinstitute.gatk.tools.walkers.haplotypecaller;

import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.variant.variantcontext.VariantContext;
import java.io.File;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.AssemblyResult;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.BaseEdge;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.BaseGraph;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.BaseVertex;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.KBestHaplotype;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.KBestHaplotypeFinder;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.SeqGraph;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.SeqVertex;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.sam.CigarUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;

/* loaded from: input_file:org/broadinstitute/gatk/tools/walkers/haplotypecaller/LocalAssemblyEngine.class */
public abstract class LocalAssemblyEngine {
    private static final Logger logger = Logger.getLogger(LocalAssemblyEngine.class);
    private static final boolean PRINT_FULL_GRAPH_FOR_DEBUGGING = true;
    public static final byte DEFAULT_MIN_BASE_QUALITY_TO_USE = 10;
    private static final int MIN_HAPLOTYPE_REFERENCE_LENGTH = 30;
    protected final int numBestHaplotypesPerGraph;
    protected boolean debug = false;
    protected boolean allowCyclesInKmerGraphToGeneratePaths = false;
    protected boolean debugGraphTransformations = false;
    protected boolean recoverDanglingTails = true;
    protected boolean recoverDanglingHeads = true;
    protected byte minBaseQualityToUseInAssembly = 10;
    protected int pruneFactor = 2;
    protected boolean errorCorrectKmers = false;
    private PrintStream graphWriter = null;

    /* JADX INFO: Access modifiers changed from: protected */
    public LocalAssemblyEngine(int i) {
        if (i < 1) {
            throw new IllegalArgumentException("numBestHaplotypesPerGraph should be >= 1 but got " + i);
        }
        this.numBestHaplotypesPerGraph = i;
    }

    protected abstract List<AssemblyResult> assemble(List<GATKSAMRecord> list, Haplotype haplotype, List<Haplotype> list2);

    public AssemblyResultSet runLocalAssembly(ActiveRegion activeRegion, Haplotype haplotype, byte[] bArr, GenomeLoc genomeLoc, List<VariantContext> list, ReadErrorCorrector readErrorCorrector) {
        List<GATKSAMRecord> reads;
        if (activeRegion == null) {
            throw new IllegalArgumentException("Assembly engine cannot be used with a null ActiveRegion.");
        }
        if (activeRegion.getExtendedLoc() == null) {
            throw new IllegalArgumentException("Active region must have an extended location.");
        }
        if (haplotype == null) {
            throw new IllegalArgumentException("Reference haplotype cannot be null.");
        }
        if (bArr.length != genomeLoc.size()) {
            throw new IllegalArgumentException("Reference bases and reference loc must be the same size.");
        }
        if (this.pruneFactor < 0) {
            throw new IllegalArgumentException("Pruning factor cannot be negative");
        }
        List<Haplotype> composeGivenHaplotypes = GenotypingGivenAllelesUtils.composeGivenHaplotypes(haplotype, list, activeRegion.getExtendedLoc());
        if (readErrorCorrector != null) {
            readErrorCorrector.addReadsToKmers(activeRegion.getReads());
            reads = new ArrayList(readErrorCorrector.correctReads(activeRegion.getReads()));
        } else {
            reads = activeRegion.getReads();
        }
        LinkedList linkedList = new LinkedList();
        AssemblyResultSet assemblyResultSet = new AssemblyResultSet();
        assemblyResultSet.setRegionForGenotyping(activeRegion);
        assemblyResultSet.setFullReferenceWithPadding(bArr);
        assemblyResultSet.setPaddedReferenceLoc(genomeLoc);
        GenomeLoc extendedLoc = activeRegion.getExtendedLoc();
        haplotype.setGenomeLocation(extendedLoc);
        assemblyResultSet.add(haplotype);
        HashMap hashMap = new HashMap();
        for (AssemblyResult assemblyResult : assemble(reads, haplotype, composeGivenHaplotypes)) {
            if (assemblyResult.getStatus() == AssemblyResult.Status.ASSEMBLED_SOME_VARIATION) {
                sanityCheckGraph(assemblyResult.getGraph(), haplotype);
                hashMap.put(assemblyResult.getGraph(), assemblyResult);
                linkedList.add(assemblyResult.getGraph());
            }
        }
        findBestPaths(linkedList, haplotype, genomeLoc, extendedLoc, hashMap, assemblyResultSet);
        if (this.graphWriter != null) {
            printGraphs(linkedList);
        }
        return assemblyResultSet;
    }

    @Ensures({"result.contains(refHaplotype)"})
    protected List<Haplotype> findBestPaths(List<SeqGraph> list, Haplotype haplotype, GenomeLoc genomeLoc, GenomeLoc genomeLoc2, Map<SeqGraph, AssemblyResult> map, AssemblyResultSet assemblyResultSet) {
        LinkedHashSet<Haplotype> linkedHashSet = new LinkedHashSet();
        int alignmentStartHapwrtRef = haplotype.getAlignmentStartHapwrtRef();
        ArrayList arrayList = new ArrayList(list.size());
        int i = 0;
        for (SeqGraph seqGraph : list) {
            SeqVertex referenceSourceVertex = seqGraph.getReferenceSourceVertex();
            SeqVertex referenceSinkVertex = seqGraph.getReferenceSinkVertex();
            if (referenceSourceVertex == null || referenceSinkVertex == null) {
                throw new IllegalArgumentException("Both source and sink cannot be null but got " + referenceSourceVertex + " and sink " + referenceSinkVertex + " for graph " + seqGraph);
            }
            KBestHaplotypeFinder kBestHaplotypeFinder = new KBestHaplotypeFinder(seqGraph, referenceSourceVertex, referenceSinkVertex);
            arrayList.add(kBestHaplotypeFinder);
            Iterator<KBestHaplotype> it2 = kBestHaplotypeFinder.iterator(this.numBestHaplotypesPerGraph);
            while (it2.hasNext()) {
                Haplotype haplotype2 = it2.next().haplotype();
                if (!linkedHashSet.contains(haplotype2)) {
                    Cigar calculateCigar = CigarUtils.calculateCigar(haplotype.getBases(), haplotype2.getBases());
                    if (calculateCigar == null) {
                        i++;
                    } else {
                        if (calculateCigar.isEmpty()) {
                            throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + calculateCigar + " with reference length " + calculateCigar.getReferenceLength() + " but expecting reference length of " + haplotype.getCigar().getReferenceLength());
                        }
                        if (!pathIsTooDivergentFromReference(calculateCigar) && calculateCigar.getReferenceLength() >= 30) {
                            if (calculateCigar.getReferenceLength() != haplotype.getCigar().getReferenceLength()) {
                                throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + calculateCigar + " with reference length " + calculateCigar.getReferenceLength() + " but expecting reference length of " + haplotype.getCigar().getReferenceLength() + " ref = " + haplotype + " path " + new String(haplotype2.getBases()));
                            }
                            haplotype2.setCigar(calculateCigar);
                            haplotype2.setAlignmentStartHapwrtRef(alignmentStartHapwrtRef);
                            haplotype2.setGenomeLocation(genomeLoc2);
                            linkedHashSet.add(haplotype2);
                            assemblyResultSet.add(haplotype2, map.get(seqGraph));
                            if (this.debug) {
                                logger.info("Adding haplotype " + haplotype2.getCigar() + " from graph with kmer " + seqGraph.getKmerSize());
                            }
                        }
                    }
                }
            }
        }
        if (!linkedHashSet.contains(haplotype)) {
            double d = Double.NaN;
            Iterator it3 = arrayList.iterator();
            while (true) {
                if (!it3.hasNext()) {
                    break;
                }
                double score = ((KBestHaplotypeFinder) it3.next()).score(haplotype);
                if (!Double.isNaN(score)) {
                    d = score;
                    break;
                }
            }
            haplotype.setScore(d);
            linkedHashSet.add(haplotype);
        }
        if (i != 0) {
            logger.debug(String.format("failed to align some haplotypes (%d) back to the reference (loc=%s); these will be ignored.", Integer.valueOf(i), genomeLoc.toString()));
        }
        if (this.debug) {
            if (linkedHashSet.size() > 1) {
                logger.info("Found " + linkedHashSet.size() + " candidate haplotypes of " + linkedHashSet.size() + " possible combinations to evaluate every read against.");
            } else {
                logger.info("Found only the reference haplotype in the assembly graph.");
            }
            for (Haplotype haplotype3 : linkedHashSet) {
                logger.info(haplotype3.toString());
                logger.info("> Cigar = " + haplotype3.getCigar() + " : " + haplotype3.getCigar().getReferenceLength() + " score " + haplotype3.getScore() + " ref " + haplotype3.isReference());
            }
        }
        return new ArrayList(linkedHashSet);
    }

    @Requires({"c != null"})
    private boolean pathIsTooDivergentFromReference(Cigar cigar) {
        Iterator<CigarElement> it2 = cigar.getCigarElements().iterator();
        while (it2.hasNext()) {
            if (it2.next().getOperator().equals(CigarOperator.N)) {
                return true;
            }
        }
        return false;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void printDebugGraphTransform(BaseGraph baseGraph, File file) {
        if (this.debugGraphTransformations) {
            baseGraph.printGraph(file, this.pruneFactor);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public AssemblyResult cleanupSeqGraph(SeqGraph seqGraph) {
        printDebugGraphTransform(seqGraph, new File("sequenceGraph.1.dot"));
        seqGraph.zipLinearChains();
        printDebugGraphTransform(seqGraph, new File("sequenceGraph.2.zipped.dot"));
        seqGraph.removeSingletonOrphanVertices();
        seqGraph.removeVerticesNotConnectedToRefRegardlessOfEdgeDirection();
        printDebugGraphTransform(seqGraph, new File("sequenceGraph.3.pruned.dot"));
        seqGraph.simplifyGraph();
        printDebugGraphTransform(seqGraph, new File("sequenceGraph.4.merged.dot"));
        if (seqGraph.getReferenceSourceVertex() == null || seqGraph.getReferenceSinkVertex() == null) {
            return new AssemblyResult(AssemblyResult.Status.JUST_ASSEMBLED_REFERENCE, seqGraph);
        }
        seqGraph.removePathsNotConnectedToRef();
        seqGraph.simplifyGraph();
        if (seqGraph.vertexSet().size() == 1) {
            SeqVertex seqVertex = (SeqVertex) seqGraph.vertexSet().iterator().next();
            SeqVertex seqVertex2 = new SeqVertex("");
            seqGraph.addVertex(seqVertex2);
            seqGraph.addEdge(seqVertex, seqVertex2, new BaseEdge(true, 0));
        }
        printDebugGraphTransform(seqGraph, new File("sequenceGraph.5.final.dot"));
        return new AssemblyResult(AssemblyResult.Status.ASSEMBLED_SOME_VARIATION, seqGraph);
    }

    private <T extends BaseVertex, E extends BaseEdge> void sanityCheckGraph(BaseGraph<T, E> baseGraph, Haplotype haplotype) {
        sanityCheckReferenceGraph(baseGraph, haplotype);
    }

    private <T extends BaseVertex, E extends BaseEdge> void sanityCheckReferenceGraph(BaseGraph<T, E> baseGraph, Haplotype haplotype) {
        if (baseGraph.getReferenceSourceVertex() == null) {
            throw new IllegalStateException("All reference graphs must have a reference source vertex.");
        }
        if (baseGraph.getReferenceSinkVertex() == null) {
            throw new IllegalStateException("All reference graphs must have a reference sink vertex.");
        }
        if (!Arrays.equals(baseGraph.getReferenceBytes(baseGraph.getReferenceSourceVertex(), baseGraph.getReferenceSinkVertex(), true, true), haplotype.getBases())) {
            throw new IllegalStateException("Mismatch between the reference haplotype and the reference assembly graph path. for graph " + baseGraph + " graph = " + new String(baseGraph.getReferenceBytes(baseGraph.getReferenceSourceVertex(), baseGraph.getReferenceSinkVertex(), true, true)) + " haplotype = " + new String(haplotype.getBases()));
        }
    }

    private void printGraphs(List<SeqGraph> list) {
        this.graphWriter.println("digraph assemblyGraphs {");
        for (SeqGraph seqGraph : list) {
            if (!this.debugGraphTransformations || seqGraph.getKmerSize() < 50) {
                seqGraph.printGraph(this.graphWriter, false, this.pruneFactor);
                if (this.debugGraphTransformations) {
                    break;
                }
            } else {
                logger.info("Skipping writing of graph with kmersize " + seqGraph.getKmerSize());
            }
        }
        this.graphWriter.println("}");
    }

    public int getPruneFactor() {
        return this.pruneFactor;
    }

    public void setPruneFactor(int i) {
        this.pruneFactor = i;
    }

    public boolean shouldErrorCorrectKmers() {
        return this.errorCorrectKmers;
    }

    public void setErrorCorrectKmers(boolean z) {
        this.errorCorrectKmers = z;
    }

    public void setGraphWriter(PrintStream printStream) {
        this.graphWriter = printStream;
    }

    public byte getMinBaseQualityToUseInAssembly() {
        return this.minBaseQualityToUseInAssembly;
    }

    public void setMinBaseQualityToUseInAssembly(byte b) {
        this.minBaseQualityToUseInAssembly = b;
    }

    public boolean isDebug() {
        return this.debug;
    }

    public void setDebug(boolean z) {
        this.debug = z;
    }

    public boolean isAllowCyclesInKmerGraphToGeneratePaths() {
        return this.allowCyclesInKmerGraphToGeneratePaths;
    }

    public void setAllowCyclesInKmerGraphToGeneratePaths(boolean z) {
        this.allowCyclesInKmerGraphToGeneratePaths = z;
    }

    public boolean isDebugGraphTransformations() {
        return this.debugGraphTransformations;
    }

    public void setDebugGraphTransformations(boolean z) {
        this.debugGraphTransformations = z;
    }

    public boolean isRecoverDanglingTails() {
        return this.recoverDanglingTails;
    }

    public void setRecoverDanglingTails(boolean z) {
        this.recoverDanglingTails = z;
    }

    public boolean isRecoverDanglingHeads() {
        return this.recoverDanglingHeads;
    }

    public void setRecoverDanglingHeads(boolean z) {
        this.recoverDanglingHeads = z;
    }
}
