package org.broadinstitute.gatk.tools.walkers.beagle;

import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFFilterHeaderLine;
import htsjdk.variant.vcf.VCFHeader;
import java.io.File;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.tools.bzip2.BZip2Constants;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.gatk.engine.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.samples.Gender;
import org.broadinstitute.gatk.engine.walkers.RodWalker;
import org.broadinstitute.gatk.tools.walkers.variantrecalibration.VQSRCalibrationCurve;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.SampleUtils;
import org.broadinstitute.gatk.utils.codecs.hapmap.RawHapMapFeature;
import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.commandline.ArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.Hidden;
import org.broadinstitute.gatk.utils.commandline.Input;
import org.broadinstitute.gatk.utils.commandline.Output;
import org.broadinstitute.gatk.utils.commandline.RodBinding;
import org.broadinstitute.gatk.utils.exceptions.GATKException;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import org.broadinstitute.gatk.utils.help.HelpConstants;
import org.broadinstitute.gatk.utils.variant.GATKVCFUtils;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;

@DocumentedGATKFeature(groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class})
/* loaded from: input_file:org/broadinstitute/gatk/tools/walkers/beagle/ProduceBeagleInput.class */
public class ProduceBeagleInput extends RodWalker<Integer, Integer> {

    @Hidden
    @Input(fullName = "validation", shortName = "validation", doc = "Validation VCF file", required = false)
    public RodBinding<VariantContext> validation;
    private static final double[] HAPLOID_FLAT_LOG10_LIKELIHOODS = MathUtils.toLog10(new double[]{0.5d, StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION, 0.5d});
    private static final double[] DIPLOID_FLAT_LOG10_LIKELIHOODS = MathUtils.toLog10(new double[]{0.33d, 0.33d, 0.33d});

    @ArgumentCollection
    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();

    @Output(doc = "File to which BEAGLE input should be written")
    protected PrintStream beagleWriter = null;

    @Output(doc = "File to which BEAGLE markers should be written", shortName = "markers", fullName = "markers", required = false, defaultToStdout = false)
    @Hidden
    protected PrintStream markers = null;
    int markerCounter = 1;

    @Hidden
    @Input(doc = "VQSqual calibration file", shortName = "cc", required = false)
    protected File VQSRCalibrationFile = null;
    protected VQSRCalibrationCurve VQSRCalibrator = null;

    @Hidden
    @Argument(doc = "VQSqual key", shortName = "vqskey", required = false)
    protected String VQSLOD_KEY = "VQSqual";

    @Hidden
    @Argument(fullName = "inserted_nocall_rate", shortName = "nc_rate", doc = "Rate (0-1) at which genotype no-calls will be randomly inserted, for testing", required = false)
    public double insertedNoCallRate = StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION;

    @Hidden
    @Argument(fullName = "validation_genotype_ptrue", shortName = "valp", doc = "Flat probability to assign to validation genotypes. Will override GL field.", required = false)
    public double validationPrior = -1.0d;

    @Hidden
    @Argument(fullName = "validation_bootstrap", shortName = "bs", doc = "Proportion of records to be used in bootstrap set", required = false)
    public double bootstrap = StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION;

    @Hidden
    @Argument(fullName = "bootstrap_vcf", shortName = "bvcf", doc = "Output a VCF with the records used for bootstrapping filtered out", required = false)
    VariantContextWriter bootstrapVCFOutput = null;

    @Argument(fullName = "checkIsMaleOnChrX", shortName = "checkIsMaleOnChrX", doc = "Set to true when Beagle-ing chrX and want to ensure male samples don't have heterozygous calls.", required = false)
    public boolean CHECK_IS_MALE_ON_CHR_X = false;

    @Hidden
    @Argument(fullName = "variant_genotype_ptrue", shortName = "varp", doc = "Flat probability prior to assign to variant (not validation) genotypes. Does not override GL field.", required = false)
    public double variantPrior = 0.96d;
    private Set<String> samples = null;
    private Set<String> BOOTSTRAP_FILTER = new HashSet(Arrays.asList("bootstrap"));
    private int bootstrapSetSize = 0;
    private int testSetSize = 0;
    private CachingFormatter formatter = new CachingFormatter("%5.4f ", BZip2Constants.baseBlockSize);
    private int certainFPs = 0;

    /* loaded from: input_file:org/broadinstitute/gatk/tools/walkers/beagle/ProduceBeagleInput$CachingFormatter.class */
    public static class CachingFormatter {
        private String format;
        private LRUCache<Double, String> cache;

        public String getFormat() {
            return this.format;
        }

        public String format(double d) {
            String str = this.cache.get(Double.valueOf(d));
            if (str == null) {
                str = String.format(this.format, Double.valueOf(d));
                this.cache.put(Double.valueOf(d), str);
            }
            return str;
        }

        public CachingFormatter(String str, int i) {
            this.format = str;
            this.cache = new LRUCache<>(i);
        }
    }

    /* loaded from: input_file:org/broadinstitute/gatk/tools/walkers/beagle/ProduceBeagleInput$LRUCache.class */
    public static class LRUCache<K, V> {
        private static final float hashTableLoadFactor = 0.75f;
        private LinkedHashMap<K, V> map;
        private int cacheSize;

        public LRUCache(int i) {
            this.cacheSize = i;
            this.map = new LinkedHashMap<K, V>(((int) Math.ceil(i / 0.75f)) + 1, 0.75f, true) { // from class: org.broadinstitute.gatk.tools.walkers.beagle.ProduceBeagleInput.LRUCache.1
                private static final long serialVersionUID = 1;

                @Override // java.util.LinkedHashMap
                protected boolean removeEldestEntry(Map.Entry<K, V> entry) {
                    return size() > LRUCache.this.cacheSize;
                }
            };
        }

        public synchronized V get(K k) {
            return this.map.get(k);
        }

        public synchronized void put(K k, V v) {
            this.map.put(k, v);
        }

        public synchronized void clear() {
            this.map.clear();
        }

        public synchronized int usedEntries() {
            return this.map.size();
        }

        public synchronized Collection<Map.Entry<K, V>> getAll() {
            return new ArrayList(this.map.entrySet());
        }
    }

    @Override // org.broadinstitute.gatk.engine.walkers.Walker
    public void initialize() {
        this.samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(this.variantCollection.variants.getName()));
        this.beagleWriter.print("marker alleleA alleleB");
        for (String str : this.samples) {
            this.beagleWriter.print(String.format(" %s %s %s", str, str, str));
        }
        this.beagleWriter.println();
        if (this.bootstrapVCFOutput != null) {
            initializeVcfWriter();
        }
        if (this.VQSRCalibrationFile != null) {
            this.VQSRCalibrator = VQSRCalibrationCurve.readFromFile(this.VQSRCalibrationFile);
            logger.info("Read calibration curve");
            this.VQSRCalibrator.printInfo(logger);
        }
    }

    @Override // org.broadinstitute.gatk.engine.walkers.LocusWalker
    public Integer map(RefMetaDataTracker refMetaDataTracker, ReferenceContext referenceContext, AlignmentContext alignmentContext) {
        if (refMetaDataTracker == null) {
            return 0;
        }
        GenomeLoc location = alignmentContext.getLocation();
        VariantContext variantContext = (VariantContext) refMetaDataTracker.getFirstValue(this.variantCollection.variants, location);
        VariantContext variantContext2 = (VariantContext) refMetaDataTracker.getFirstValue(this.validation, location);
        if (!goodSite(variantContext, variantContext2)) {
            return 0;
        }
        if (useValidation(variantContext2, referenceContext)) {
            writeBeagleOutput(variantContext2, variantContext, true, this.validationPrior);
            return 1;
        }
        if (!goodSite(variantContext)) {
            return 0;
        }
        writeBeagleOutput(variantContext, variantContext2, false, this.variantPrior);
        return 1;
    }

    public boolean goodSite(VariantContext variantContext, VariantContext variantContext2) {
        return goodSite(variantContext) || goodSite(variantContext2);
    }

    public boolean goodSite(VariantContext variantContext) {
        if (!canBeOutputToBeagle(variantContext)) {
            return false;
        }
        if (this.VQSRCalibrator == null || !this.VQSRCalibrator.certainFalsePositive(this.VQSLOD_KEY, variantContext)) {
            return true;
        }
        this.certainFPs++;
        return false;
    }

    public static boolean canBeOutputToBeagle(VariantContext variantContext) {
        return variantContext != null && !variantContext.isFiltered() && variantContext.isBiallelic() && variantContext.hasGenotypes();
    }

    public boolean useValidation(VariantContext variantContext, ReferenceContext referenceContext) {
        if (!goodSite(variantContext)) {
            if (variantContext == null || this.bootstrapVCFOutput == null) {
                return false;
            }
            this.bootstrapVCFOutput.add(variantContext);
            return false;
        }
        logger.debug(String.format("boot: %d, test: %d, total: %d", Integer.valueOf(this.bootstrapSetSize), Integer.valueOf(this.testSetSize), Integer.valueOf(this.bootstrapSetSize + this.testSetSize + 1)));
        if ((this.bootstrapSetSize + 1.0d) / ((1.0d + this.bootstrapSetSize) + this.testSetSize) <= this.bootstrap) {
            if (this.bootstrapVCFOutput != null) {
                this.bootstrapVCFOutput.add(new VariantContextBuilder(variantContext).filters(this.BOOTSTRAP_FILTER).make());
            }
            this.bootstrapSetSize++;
            return true;
        }
        if (this.bootstrapVCFOutput != null) {
            this.bootstrapVCFOutput.add(variantContext);
        }
        this.testSetSize++;
        return false;
    }

    public void writeBeagleOutput(VariantContext variantContext, VariantContext variantContext2, boolean z, double d) {
        Genotype genotype;
        boolean z2;
        double[] dArr;
        GenomeLoc location = GATKVariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), variantContext);
        StringBuffer stringBuffer = new StringBuffer();
        String format = String.format("%s:%d ", location.getContig(), Integer.valueOf(location.getStart()));
        stringBuffer.append(format);
        if (this.markers != null) {
            PrintStream append = this.markers.append((CharSequence) format).append((CharSequence) "\t");
            int i = this.markerCounter;
            this.markerCounter = i + 1;
            append.append((CharSequence) Integer.toString(i)).append((CharSequence) "\t");
        }
        for (Allele allele : variantContext.getAlleles()) {
            String baseString = allele.isNoCall() ? RawHapMapFeature.NULL_ALLELE_STRING : allele.getBaseString();
            stringBuffer.append(String.format("%s ", baseString));
            if (this.markers != null) {
                this.markers.append((CharSequence) baseString).append((CharSequence) "\t");
            }
        }
        if (this.markers != null) {
            this.markers.append((CharSequence) IOUtils.LINE_SEPARATOR_UNIX);
        }
        GenotypesContext genotypes = variantContext.getGenotypes();
        GenotypesContext genotypes2 = goodSite(variantContext2) ? variantContext2.getGenotypes() : null;
        for (String str : this.samples) {
            boolean z3 = this.CHECK_IS_MALE_ON_CHR_X && getSample(str).getGender() == Gender.MALE;
            if (genotypes.containsSample(str)) {
                genotype = genotypes.get(str);
                z2 = z;
            } else {
                if (genotypes2 == null || !genotypes2.containsSample(str)) {
                    throw new GATKException("Sample " + str + " arose with no genotype in variant or validation VCF. This should never happen.");
                }
                genotype = genotypes2.get(str);
                z2 = !z;
            }
            if ((z2 && d < StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION) || genotype.hasLikelihoods()) {
                dArr = genotype.getLikelihoods().getAsVector();
                if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() <= this.insertedNoCallRate) {
                    dArr = z3 ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS;
                }
                if (z3) {
                    dArr[1] = -255.0d;
                }
            } else if (z2 || !genotype.isCalled() || genotype.hasLikelihoods()) {
                dArr = z3 ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS;
            } else {
                double d2 = (1.0d - d) / 2.0d;
                double d3 = (1.0d - d) / 2.0d;
                double d4 = (1.0d - d) / 2.0d;
                if (genotype.isHomRef()) {
                    d2 = d;
                } else if (genotype.isHet()) {
                    d3 = d;
                } else if (genotype.isHomVar()) {
                    d4 = d;
                }
                double[] dArr2 = new double[3];
                dArr2[0] = d2;
                dArr2[1] = z3 ? StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION : d3;
                dArr2[2] = d4;
                dArr = MathUtils.toLog10(dArr2);
            }
            writeSampleLikelihoods(stringBuffer, variantContext, dArr);
        }
        this.beagleWriter.println(stringBuffer.toString());
    }

    private void writeSampleLikelihoods(StringBuffer stringBuffer, VariantContext variantContext, double[] dArr) {
        if (this.VQSRCalibrator != null) {
            dArr = this.VQSRCalibrator.includeErrorRateInLikelihoods(this.VQSLOD_KEY, variantContext, dArr);
        }
        for (double d : MathUtils.normalizeFromLog10(dArr)) {
            stringBuffer.append(this.formatter.format(d));
        }
    }

    @Override // org.broadinstitute.gatk.engine.walkers.Walker
    public Integer reduceInit() {
        return 0;
    }

    @Override // org.broadinstitute.gatk.engine.walkers.Walker
    public Integer reduce(Integer num, Integer num2) {
        return Integer.valueOf(num.intValue() + num2.intValue());
    }

    @Override // org.broadinstitute.gatk.engine.walkers.Walker
    public void onTraversalDone(Integer num) {
        logger.info("Sites included in beagle likelihoods file             : " + num);
        logger.info(String.format("Certain false positive found from recalibration curve : %d (%.2f%%)", Integer.valueOf(this.certainFPs), Double.valueOf((100.0d * this.certainFPs) / Math.max(this.certainFPs + num.intValue(), 1))));
    }

    private void initializeVcfWriter() {
        List asList = Arrays.asList(this.validation.getName());
        HashSet hashSet = new HashSet();
        hashSet.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), asList));
        hashSet.add(new VCFFilterHeaderLine("bootstrap", "This site used for genotype bootstrapping with ProduceBeagleInputWalker"));
        this.bootstrapVCFOutput.writeHeader(new VCFHeader(hashSet, SampleUtils.getUniqueSamplesFromRods(getToolkit(), asList)));
    }
}
