package de.gwdg.cdstar.ext.elastic;

import com.strobel.assembler.metadata.Flags;
import java.io.InputStream;
import opennlp.tools.util.featuregen.WindowFeatureGenerator;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.xml.sax.SAXException;

/* loaded from: input_file:de/gwdg/cdstar/ext/elastic/TikaHelper.class */
public class TikaHelper {
    private int textLimit;

    /* loaded from: input_file:de/gwdg/cdstar/ext/elastic/TikaHelper$LimitedTextExtractor.class */
    static class LimitedTextExtractor extends ContentHandlerDecorator {
        StringBuilder sb = new StringBuilder(1024);
        private final int maxLen = 1048576;

        /* loaded from: input_file:de/gwdg/cdstar/ext/elastic/TikaHelper$LimitedTextExtractor$SizeLimitReachedException.class */
        protected static class SizeLimitReachedException extends SAXException {
            private static final long serialVersionUID = 8651398189308287854L;

            public SizeLimitReachedException(long j) {
                super("Input truncated at " + j + " bytes");
            }
        }

        LimitedTextExtractor() {
        }

        @Override // org.apache.tika.sax.ContentHandlerDecorator, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
        }

        @Override // org.apache.tika.sax.ContentHandlerDecorator, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if (str2.equals(WindowFeatureGenerator.PREV_PREFIX)) {
                this.sb.append("\n");
            }
        }

        @Override // org.apache.tika.sax.ContentHandlerDecorator, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            if (i2 <= 0) {
                return;
            }
            this.sb.append(cArr, i, i2);
            if (this.sb.length() > 1048576) {
                throw new SizeLimitReachedException(this.sb.length());
            }
        }

        public String getString() {
            return this.sb.toString();
        }

        public boolean isTruncated() {
            return this.sb.length() > 1048576;
        }
    }

    /* loaded from: input_file:de/gwdg/cdstar/ext/elastic/TikaHelper$ParseResult.class */
    public static class ParseResult {
        private final String text;
        private final Metadata meta;
        private final Exception error;

        ParseResult(String str, Metadata metadata, Exception exc) {
            this.text = str;
            this.meta = metadata;
            this.error = exc;
        }

        public String getText() {
            return this.text;
        }

        public Metadata getMeta() {
            return this.meta;
        }

        public Exception getError() {
            return this.error;
        }

        public boolean hasError() {
            return this.error != null;
        }

        public boolean isTruncated() {
            return this.error instanceof LimitedTextExtractor.SizeLimitReachedException;
        }
    }

    public TikaHelper() {
        setTextLimit(Flags.SOURCE_SEEN);
    }

    public int getTextLimit() {
        return this.textLimit;
    }

    public void setTextLimit(int i) {
        this.textLimit = i;
    }

    public ParseResult parse(InputStream inputStream, String str, String str2) {
        LimitedTextExtractor limitedTextExtractor = new LimitedTextExtractor();
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        if (str != null) {
            metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, str);
        }
        if (str2 != null) {
            metadata.add("Content-Type", str2);
        }
        try {
            autoDetectParser.parse(inputStream, limitedTextExtractor, metadata);
            return new ParseResult(limitedTextExtractor.getString().trim(), metadata, null);
        } catch (Exception e) {
            return new ParseResult(limitedTextExtractor.getString().trim(), metadata, e);
        }
    }
}
