package org.broadinstitute.gatk.engine.filters;

import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMTagUtil;
import java.util.Iterator;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.ReadProperties;
import org.broadinstitute.gatk.engine.arguments.ValidationExclusion;
import org.broadinstitute.gatk.engine.datasources.reads.SAMDataSource;
import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.exceptions.UserException;

/* loaded from: input_file:org/broadinstitute/gatk/engine/filters/MalformedReadFilter.class */
public class MalformedReadFilter extends ReadFilter {
    private static final String FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME = "filter_reads_with_N_cigar";
    private SAMFileHeader header;

    @Argument(fullName = FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME, shortName = "filterRNC", doc = "filter out reads with CIGAR containing the N operator, instead of stop processing and report an error.", required = false)
    boolean filterReadsWithNCigar = false;

    @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required = false)
    boolean filterMismatchingBaseAndQuals = false;

    @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "if a read has no stored bases (i.e. a '*'), filter out the read instead of blowing up.", required = false)
    boolean filterBasesNotStored = false;
    private boolean allowNCigars;

    @Override // org.broadinstitute.gatk.engine.filters.ReadFilter
    public void initialize(GenomeAnalysisEngine genomeAnalysisEngine) {
        ReadProperties readsInfo;
        this.header = genomeAnalysisEngine.getSAMFileHeader();
        ValidationExclusion validationExclusion = null;
        SAMDataSource readsDataSource = genomeAnalysisEngine.getReadsDataSource();
        if (readsDataSource != null && (readsInfo = readsDataSource.getReadsInfo()) != null) {
            validationExclusion = readsInfo.getValidationExclusionList();
        }
        if (validationExclusion == null) {
            this.allowNCigars = false;
        } else {
            this.allowNCigars = validationExclusion.contains(ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS);
        }
    }

    @Override // htsjdk.samtools.filter.SamRecordFilter
    public boolean filterOut(SAMRecord sAMRecord) {
        return (checkInvalidAlignmentStart(sAMRecord) && checkInvalidAlignmentEnd(sAMRecord) && checkAlignmentDisagreesWithHeader(this.header, sAMRecord) && checkHasReadGroup(sAMRecord) && checkMismatchingBasesAndQuals(sAMRecord, this.filterMismatchingBaseAndQuals) && checkCigarDisagreesWithAlignment(sAMRecord) && checkSeqStored(sAMRecord, this.filterBasesNotStored) && checkCigarIsSupported(sAMRecord, this.filterReadsWithNCigar, this.allowNCigars)) ? false : true;
    }

    private static boolean checkHasReadGroup(SAMRecord sAMRecord) {
        if (sAMRecord.getReadGroup() != null) {
            return true;
        }
        String str = (String) sAMRecord.getAttribute(SAMTagUtil.getSingleton().RG);
        if (str == null) {
            throw new UserException.ReadMissingReadGroup(sAMRecord);
        }
        throw new UserException.ReadHasUndefinedReadGroup(sAMRecord, str);
    }

    private static boolean checkInvalidAlignmentStart(SAMRecord sAMRecord) {
        if (sAMRecord.getReadUnmappedFlag() || sAMRecord.getAlignmentStart() != 0) {
            return sAMRecord.getReadUnmappedFlag() || sAMRecord.getAlignmentStart() != -1;
        }
        return false;
    }

    private static boolean checkInvalidAlignmentEnd(SAMRecord sAMRecord) {
        return sAMRecord.getReadUnmappedFlag() || sAMRecord.getAlignmentEnd() == -1 || (sAMRecord.getAlignmentEnd() - sAMRecord.getAlignmentStart()) + 1 >= 0;
    }

    private static boolean checkAlignmentDisagreesWithHeader(SAMFileHeader sAMFileHeader, SAMRecord sAMRecord) {
        if (sAMRecord.getReferenceIndex().intValue() != -1 || sAMRecord.getAlignmentStart() == 0) {
            return sAMRecord.getReadUnmappedFlag() || sAMRecord.getAlignmentStart() <= sAMFileHeader.getSequence(sAMRecord.getReferenceIndex().intValue()).getSequenceLength();
        }
        return false;
    }

    private static boolean checkCigarDisagreesWithAlignment(SAMRecord sAMRecord) {
        return sAMRecord.getReadUnmappedFlag() || sAMRecord.getAlignmentStart() == -1 || sAMRecord.getAlignmentStart() == 0 || sAMRecord.getAlignmentBlocks().size() >= 0;
    }

    private static boolean checkCigarIsSupported(SAMRecord sAMRecord, boolean z, boolean z2) {
        if (!containsNOperator(sAMRecord)) {
            return true;
        }
        if (z || z2) {
            return !z;
        }
        throw new UserException.UnsupportedCigarOperatorException(CigarOperator.N, sAMRecord, "Perhaps you are trying to use RNA-Seq data? While we are currently actively working to support this data type unfortunately the GATK cannot be used with this data in its current form. You have the option of either filtering out all reads with operator " + CigarOperator.N + " in their CIGAR string (please add --" + FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME + " to your command line) or assume the risk of processing those reads as they are including the pertinent unsafe flag (please add -U " + ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS + " to your command line). Notice however that if you were to choose the latter, an unspecified subset of the analytical outputs of an unspecified subset of the tools will become unpredictable. Consequently the GATK team might well not be able to provide you with the usual support with any issue regarding any output");
    }

    private static boolean containsNOperator(SAMRecord sAMRecord) {
        Cigar cigar = sAMRecord.getCigar();
        if (cigar == null) {
            return false;
        }
        Iterator<CigarElement> it2 = cigar.getCigarElements().iterator();
        while (it2.hasNext()) {
            if (it2.next().getOperator() == CigarOperator.N) {
                return true;
            }
        }
        return false;
    }

    private static boolean checkMismatchingBasesAndQuals(SAMRecord sAMRecord, boolean z) {
        boolean z2;
        if (sAMRecord.getReadLength() == sAMRecord.getBaseQualities().length) {
            z2 = true;
        } else {
            if (!z) {
                Object[] objArr = new Object[4];
                objArr[0] = sAMRecord.getReadName();
                objArr[1] = Integer.valueOf(sAMRecord.getReadLength());
                objArr[2] = Integer.valueOf(sAMRecord.getBaseQualities().length);
                objArr[3] = sAMRecord.getBaseQualities().length == 0 ? " You can use --defaultBaseQualities to assign a default base quality for all reads, but this can be dangerous in you don't know what you are doing." : "";
                throw new UserException.MalformedBAM(sAMRecord, String.format("BAM file has a read with mismatching number of bases and base qualities. Offender: %s [%d bases] [%d quals].%s", objArr));
            }
            z2 = false;
        }
        return z2;
    }

    protected static boolean checkSeqStored(SAMRecord sAMRecord, boolean z) {
        if (sAMRecord.getReadBases() != SAMRecord.NULL_SEQUENCE) {
            return true;
        }
        if (z) {
            return false;
        }
        throw new UserException.MalformedBAM(sAMRecord, String.format("the BAM file has a read with no stored bases (i.e. it uses '*') which is not supported in the GATK; see the --filter_bases_not_stored argument. Offender: %s", sAMRecord.getReadName()));
    }
}
