package de.gwdg.cdstar.ext.elastic;

import com.fasterxml.jackson.databind.node.ObjectNode;
import com.googlecode.mp4parser.boxes.apple.TrackLoadSettingsAtom;
import de.gwdg.cdstar.Utils;
import de.gwdg.cdstar.client.CDStarRestClient;
import de.gwdg.cdstar.client.ErrorResponseException;
import de.gwdg.cdstar.client.actions.GetArchiveInfo;
import de.gwdg.cdstar.client.actions.GetFileStream;
import de.gwdg.cdstar.client.helper.ArchiveFileIterable;
import de.gwdg.cdstar.event.ChangeEvent;
import de.gwdg.cdstar.ext.elastic.BulkRequest;
import de.gwdg.cdstar.ext.elastic.ElasticSearchClient;
import de.gwdg.cdstar.ext.elastic.SearchRequest;
import de.gwdg.cdstar.ext.elastic.TikaHelper;
import de.gwdg.cdstar.ext.elastic.dao.ArchiveDocument;
import de.gwdg.cdstar.ext.elastic.dao.FileDocument;
import de.gwdg.cdstar.ext.elastic.dao.IndexDocument;
import de.gwdg.cdstar.web.common.model.ArchiveInfo;
import de.gwdg.cdstar.web.common.model.FileInfo;
import de.gwdg.cdstar.web.common.model.MetaMap;
import java.io.IOException;
import java.io.InputStream;
import java.time.Duration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.commons.collections4.map.HashedMap;
import org.apache.cxf.phase.Phase;
import org.apache.uima.cas.impl.XCASSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/gwdg/cdstar/ext/elastic/ElasticIngestJob.class */
public class ElasticIngestJob implements AutoCloseable {
    private final ElasticSearchClient es;
    private final CDStarRestClient cdstar;
    private final ChangeEvent event;
    private final String index;
    private ArchiveInfo archive;
    private ArrayList<FileInfo> allFiles;
    private Map<String, FileInfo> filesToSync;
    private BulkRequest currentBulk;
    private final String taskKey;
    private static final Logger log = LoggerFactory.getLogger((Class<?>) ElasticIngestJob.class);
    private static TikaHelper tika = new TikaHelper();
    private int maxBulk = 32;
    private long msTimeout = Duration.ofSeconds(60).toMillis();
    private final HashSet<String> readAccessForArchive = new HashSet<>();
    private final HashSet<String> readAccessForFiles = new HashSet<>();

    public ElasticIngestJob(CDStarRestClient cDStarRestClient, ElasticSearchClient elasticSearchClient, String str, ChangeEvent changeEvent) {
        this.cdstar = cDStarRestClient.m2081clone();
        this.event = changeEvent;
        this.es = elasticSearchClient;
        this.index = str;
        this.taskKey = changeEvent.getVault() + ":" + changeEvent.getArchive();
    }

    public void setMaxBulk(int i) {
        this.maxBulk = i;
    }

    public void setTimeout(int i, TimeUnit timeUnit) {
        this.msTimeout = timeUnit.toMillis(i);
    }

    @Override // java.lang.AutoCloseable
    public void close() {
        Utils.closeQuietly(this.cdstar);
    }

    public void sync() throws InterruptedException, CancellationException, ExecutionException, TimeoutException, IOException {
        this.cdstar.begin(true);
        checkInterrupted();
        try {
            this.archive = new GetArchiveInfo(this.event.getVault(), this.event.getArchive()).withAcl(true).withMeta(true).execute(this.cdstar);
            if (!this.archive.state.isReadable()) {
                log.warn("[{}] Archive is in {} state: File content will not be indexed.", this.taskKey, this.archive.state);
            }
            checkInterrupted();
            if (this.event.getRevision() != null && !this.archive.revision.equals(this.event.getRevision())) {
                log.debug("[{}] Archive revision does not match. Skipping this task.", this.taskKey);
                return;
            }
            for (Map.Entry<String, List<String>> entry : this.archive.acl.getMap().entrySet()) {
                String key = entry.getKey();
                List<String> value = entry.getValue();
                if (key.equals("$owner")) {
                    key = this.archive.owner;
                }
                if (value.contains("READ") || value.contains("OWNER")) {
                    this.readAccessForArchive.add(key);
                    this.readAccessForFiles.add(key);
                } else if (value.contains(TrackLoadSettingsAtom.TYPE) && value.contains(Phase.READ) && value.contains("read_meta")) {
                    this.readAccessForArchive.add(key);
                    if (value.contains("read_files")) {
                        this.readAccessForFiles.add(key);
                    }
                }
            }
            if (this.readAccessForArchive.isEmpty()) {
                log.debug("[{}] Archive ACL for read access is empty. Entry will be indexed anyway.", this.taskKey);
            }
            checkInterrupted();
            this.allFiles = new ArrayList<>();
            new ArchiveFileIterable(this.cdstar, this.archive.vault, this.archive.id).withMeta(true).forEach(fileInfo -> {
                this.allFiles.add(fileInfo);
            });
            this.filesToSync = new HashedMap(this.allFiles.size());
            this.allFiles.forEach(fileInfo2 -> {
                this.filesToSync.put(fileInfo2.id, fileInfo2);
            });
            checkInterrupted();
            SearchRequest search = this.es.search(this.index);
            search.dsl().with("bool").withArray("filter").addObject().with("term").put("id", this.archive.id);
            search.sort(XCASSerializer.ID_ATTR_NAME);
            search.source("id", "scope", "modified", FileDocument.FID_FIELD, FileDocument.SHA256_FIELD);
            search.limit(25);
            ArchiveDocument archiveDocument = null;
            Object waitFor = waitFor(search);
            while (true) {
                SearchRequest.ESSearchResult eSSearchResult = (SearchRequest.ESSearchResult) waitFor;
                if (eSSearchResult.size() <= 0) {
                    break;
                }
                for (SearchRequest.ESSearchHit eSSearchHit : eSSearchResult.hits()) {
                    checkInterrupted();
                    IndexDocument indexDocument = (IndexDocument) eSSearchHit.source(IndexDocument.class);
                    if (indexDocument instanceof ArchiveDocument) {
                        archiveDocument = (ArchiveDocument) indexDocument;
                    } else if (indexDocument instanceof FileDocument) {
                        indexFile((FileDocument) indexDocument, this.archive, this.filesToSync.remove(((FileDocument) indexDocument).fid));
                    }
                }
                waitFor = waitFor(search.searchAfter(eSSearchResult));
            }
            Iterator<FileInfo> it = this.filesToSync.values().iterator();
            while (it.hasNext()) {
                indexFile(new FileDocument(), this.archive, it.next());
            }
            indexArchive(archiveDocument == null ? new ArchiveDocument() : archiveDocument, this.archive);
            sendBulk();
        } catch (ErrorResponseException e) {
            if (e.getError().getStatus() != 404) {
                log.warn("[{}] Failed to read archive info from cdstar", this.taskKey, e);
                throw e;
            }
            log.debug("[{}] Archive does not exist or is not visible. Removing from index.", this.taskKey);
            deleteByTermQuery("id", this.event.getArchive());
        }
    }

    private <T> T waitFor(ElasticSearchClient.ESPreparedRequest<T> eSPreparedRequest) throws CancellationException, ExecutionException, InterruptedException, TimeoutException {
        return eSPreparedRequest.submit().get(this.msTimeout, TimeUnit.MILLISECONDS);
    }

    private void deleteByTermQuery(String str, String str2) throws CancellationException, ExecutionException, InterruptedException, TimeoutException {
        sendBulk();
        DeleteByQueryRequest deleteByQuery = this.es.deleteByQuery(this.index);
        deleteByQuery.dsl().with("term").put(str, str2);
        waitFor(deleteByQuery);
    }

    private BulkRequest getBulk() throws CancellationException, ExecutionException, InterruptedException, TimeoutException {
        if (this.currentBulk != null && this.currentBulk.size() >= this.maxBulk) {
            sendBulk();
        }
        if (this.currentBulk == null) {
            this.currentBulk = this.es.bulk();
        }
        return this.currentBulk;
    }

    private void sendBulk() throws CancellationException, ExecutionException, InterruptedException, TimeoutException {
        if (this.currentBulk == null || this.currentBulk.isEmpty()) {
            return;
        }
        log.debug("[{}] Sending bulk request ({} items)", this.taskKey, Integer.valueOf(this.currentBulk.size()));
        BulkRequest.ESBulkResult eSBulkResult = (BulkRequest.ESBulkResult) waitFor(this.currentBulk);
        if (eSBulkResult.hasErrors()) {
            throw new RuntimeException("Bulk request failed: " + eSBulkResult.getResult());
        }
        this.currentBulk = null;
    }

    private void checkInterrupted() throws InterruptedException {
        if (Thread.interrupted()) {
            throw new InterruptedException();
        }
    }

    private void indexArchive(ArchiveDocument archiveDocument, ArchiveInfo archiveInfo) throws CancellationException, ExecutionException, InterruptedException, TimeoutException {
        archiveDocument.id = archiveInfo.id;
        archiveDocument.vault = archiveInfo.vault;
        archiveDocument.owner = archiveInfo.owner;
        archiveDocument.tags = new HashSet();
        archiveDocument.created = archiveInfo.created;
        archiveDocument.modified = archiveInfo.modified;
        archiveDocument.read_access = this.readAccessForArchive;
        archiveDocument.size = this.allFiles.stream().mapToLong(fileInfo -> {
            return fileInfo.size;
        }).sum();
        archiveDocument.revision = archiveInfo.revision;
        archiveDocument.profile = archiveInfo.profile;
        copyMetaAttributes(archiveDocument, archiveInfo.meta);
        log.debug("[{}] Indexing archive", this.taskKey);
        getBulk().index(this.index, archiveInfo.id, archiveDocument);
    }

    private void indexFile(FileDocument fileDocument, ArchiveInfo archiveInfo, FileInfo fileInfo) throws IOException, CancellationException, ExecutionException, InterruptedException, TimeoutException {
        if (fileInfo == null) {
            log.debug("[{}] Removing file from index: {}", this.taskKey, fileDocument.fid);
            getBulk().delete(this.index, fileDocument.getIndexID());
            return;
        }
        boolean z = fileDocument.id == null;
        boolean z2 = z || !fileInfo.digests.sha256.equals(fileDocument.sha256);
        boolean isReadable = archiveInfo.state.isReadable();
        fileDocument.id = archiveInfo.id;
        fileDocument.vault = archiveInfo.vault;
        fileDocument.owner = archiveInfo.owner;
        fileDocument.tags = new HashSet();
        fileDocument.created = fileInfo.created;
        fileDocument.modified = fileInfo.modified;
        fileDocument.read_access = this.readAccessForFiles;
        fileDocument.size = fileInfo.size;
        copyMetaAttributes(fileDocument, fileInfo.meta);
        fileDocument.fid = fileInfo.id;
        fileDocument.type = fileInfo.type;
        fileDocument.sha256 = fileInfo.digests.sha256;
        fileDocument.name = fileInfo.name;
        int lastIndexOf = fileDocument.name.lastIndexOf(47);
        fileDocument.filename = lastIndexOf > -1 ? fileDocument.name.substring(lastIndexOf + 1) : fileDocument.name;
        log.debug("[{}] Indexing file: {} ({})", this.taskKey, fileDocument.fid, fileDocument.type);
        if (fileInfo.size == 0) {
            fileDocument.content = "";
        } else if (!mayContainText(fileInfo)) {
            fileDocument.content = "";
            fileDocument.tags.add("not_analysed");
        } else if (z2 && isReadable) {
            TikaHelper.ParseResult extractText = extractText(fileInfo);
            if (extractText.isTruncated()) {
                log.debug("[{}] Text truncated to {} bytes", this.taskKey, Integer.valueOf(extractText.getText().length()));
            } else if (extractText.hasError()) {
                log.warn("[{}] Text extraction failed", this.taskKey, extractText.getError());
            }
            fileDocument.content = extractText.getText();
        } else if (z2) {
            fileDocument.tags.add("stale");
        }
        if (fileDocument.content != null || z) {
            getBulk().index(this.index, fileDocument.getIndexID(), fileDocument);
            return;
        }
        ObjectNode json = this.es.json();
        json.with("script").put("lang", "painless");
        json.with("script").put("source", "for(key in params.keySet()) {if(key != \"ctx\") ctx._source[key] = params[key];}");
        json.with("script").putPOJO("params", fileDocument);
        getBulk().update(this.index, fileDocument.getIndexID(), json);
    }

    private boolean mayContainText(FileInfo fileInfo) {
        if (fileInfo.size == 0) {
            return false;
        }
        String str = fileInfo.type != null ? fileInfo.type : "application/octet-stream";
        if (str.startsWith("text/")) {
            return true;
        }
        return (str.startsWith("image/") || str.startsWith("video/") || str.startsWith("audio/")) ? false : true;
    }

    private TikaHelper.ParseResult extractText(FileInfo fileInfo) throws IOException {
        InputStream inputStream = (InputStream) this.cdstar.execute(new GetFileStream(this.archive.vault, this.archive.id, fileInfo.name).range(0L, tika.getTextLimit()));
        try {
            TikaHelper.ParseResult parse = tika.parse(inputStream, fileInfo.name, fileInfo.type);
            if (inputStream != null) {
                inputStream.close();
            }
            return parse;
        } catch (Throwable th) {
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    private void copyMetaAttributes(IndexDocument indexDocument, MetaMap metaMap) {
        indexDocument.meta = new HashMap();
        for (Map.Entry<String, List<String>> entry : metaMap.getMap().entrySet()) {
            indexDocument.meta.put(entry.getKey(), entry.getValue());
        }
    }

    static {
        tika.setTextLimit(8388608);
    }
}
