/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysds.runtime.transform.tokenize;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysds.common.Types;
import org.apache.sysds.conf.ConfigurationManager;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.frame.data.FrameBlock;
import org.apache.sysds.runtime.transform.tokenize.DocumentRepresentation;
import org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplier;
import org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplierCount;
import org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplierHash;
import org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplierPosition;
import org.apache.sysds.runtime.transform.tokenize.builder.TokenizerBuilder;
import org.apache.sysds.runtime.util.DependencyTask;
import org.apache.sysds.runtime.util.DependencyThreadPool;

public class Tokenizer
implements Serializable {
    private static final long serialVersionUID = 7155673772374114577L;
    protected static final Log LOG = LogFactory.getLog((String)Tokenizer.class.getName());
    private static final boolean MULTI_THREADED_STAGES_TOKENIZER = false;
    public static final int TOKENIZE_NUM_BLOCKS = ConfigurationManager.getNumberTokenizeBlocks();
    private DocumentRepresentation[] internalRepresentation = null;
    private final TokenizerBuilder tokenizerBuilder;
    private final TokenizerApplier tokenizerApplier;

    protected Tokenizer(TokenizerBuilder tokenizerBuilder, TokenizerApplier tokenizerApplier) {
        this.tokenizerBuilder = tokenizerBuilder;
        this.tokenizerApplier = tokenizerApplier;
    }

    public Types.ValueType[] getSchema() {
        return this.tokenizerApplier.getOutSchema();
    }

    public int getMaxNumRows(int inRows) {
        return this.tokenizerApplier.getMaxNumRows(inRows);
    }

    public int getNumRowsEstimate() {
        if (this.internalRepresentation != null) {
            if (this.tokenizerApplier.isWideFormat()) {
                return this.internalRepresentation.length;
            }
            if (this.tokenizerApplier.hasPadding()) {
                return this.internalRepresentation.length * this.tokenizerApplier.getMaxTokens();
            }
            return Arrays.stream(this.internalRepresentation).mapToInt(doc -> Math.min(doc.tokens.size(), this.tokenizerApplier.getMaxTokens())).sum();
        }
        throw new DMLRuntimeException("Internal Token Representation was not computed yet. Can not get exact size.");
    }

    public long getNumCols() {
        return this.tokenizerApplier.getNumCols();
    }

    public void allocateInternalRepresentation(int numDocuments) {
        this.internalRepresentation = new DocumentRepresentation[numDocuments];
        this.tokenizerApplier.allocateInternalMeta(numDocuments);
    }

    public FrameBlock tokenize(FrameBlock in) {
        return this.tokenize(in, 1);
    }

    public FrameBlock tokenize(FrameBlock in, int k) {
        this.allocateInternalRepresentation(in.getNumRows());
        FrameBlock out = new FrameBlock(this.getSchema());
        if (k > 1) {
            DependencyThreadPool pool = new DependencyThreadPool(k);
            LOG.debug((Object)("Tokenizing with full DAG on " + k + " Threads"));
            try {
                List<DependencyTask<?>> tokenizeTasks = this.getTokenizeTasks(in, out, pool);
                int lastRow = pool.submitAllAndWait(tokenizeTasks).stream().map(s -> s == null ? 0 : (Integer)s).max((x, y) -> Integer.compare(x, y)).get();
                if (lastRow != out.getNumRows()) {
                    out = out.slice(0, lastRow - 1, 0, out.getNumColumns() - 1, null);
                }
            }
            catch (InterruptedException | ExecutionException e) {
                LOG.error((Object)"MT tokenize failed");
                e.printStackTrace();
            }
            pool.shutdown();
        } else {
            this.build(in, k);
            out.ensureAllocatedColumns(this.tokenizerApplier.getNumRows(this.internalRepresentation));
            out = this.apply(out, k);
        }
        return out;
    }

    private List<DependencyTask<?>> getTokenizeTasks(FrameBlock in, FrameBlock out, DependencyThreadPool pool) {
        ArrayList tasks = new ArrayList();
        HashMap<Integer[], Integer[]> depMap = new HashMap<Integer[], Integer[]>();
        tasks.add(DependencyThreadPool.createDependencyTask(new AllocateOutputFrame(this, out)));
        List<DependencyTask<?>> buildTasks = this.getBuildTasks(in);
        tasks.addAll(buildTasks);
        List<DependencyTask<?>> applyTasks = this.tokenizerApplier.getApplyTasks(this.internalRepresentation, out);
        if (applyTasks.size() != buildTasks.size() / 2) {
            throw new DMLRuntimeException("Different block sizes between build and apply tasks currently not supported");
        }
        if (!this.tokenizerApplier.isWideFormat() || !this.tokenizerApplier.hasPadding()) {
            int buildTaskOffset;
            if (this.tokenizerApplier instanceof TokenizerApplierPosition) {
                buildTaskOffset = 0;
            } else if (this.tokenizerApplier instanceof TokenizerApplierCount || this.tokenizerApplier instanceof TokenizerApplierHash) {
                buildTaskOffset = applyTasks.size();
            } else {
                throw new DMLRuntimeException("Unknown TokenizerApplier");
            }
            depMap.put(new Integer[]{0, 1}, new Integer[]{1, buildTasks.size() / 2 + 1});
            depMap.put(new Integer[]{tasks.size(), tasks.size() + applyTasks.size()}, new Integer[]{0, 1});
            for (int i = 0; i < applyTasks.size(); ++i) {
                depMap.put(new Integer[]{tasks.size() + i, tasks.size() + applyTasks.size()}, new Integer[]{1 + buildTaskOffset + i, 2 + buildTaskOffset + i});
            }
        }
        tasks.addAll(applyTasks);
        ArrayList<Object> deps = new ArrayList<Object>(Collections.nCopies(tasks.size(), null));
        DependencyThreadPool.createDependencyList(tasks, depMap, deps);
        return DependencyThreadPool.createDependencyTasks(tasks, deps);
    }

    public FrameBlock apply(FrameBlock out, int k) {
        int lastRow = -1;
        if (k > 1) {
            DependencyThreadPool pool = new DependencyThreadPool(k);
            try {
                List<DependencyTask<?>> taskList = this.tokenizerApplier.getApplyTasks(this.internalRepresentation, out);
                lastRow = pool.submitAllAndWait(taskList).stream().map(x -> (Integer)x).max((x, y) -> Integer.compare(x, y)).get();
            }
            catch (InterruptedException | ExecutionException e) {
                LOG.error((Object)"MT Tokenizer apply failed");
                e.printStackTrace();
            }
            pool.shutdown();
        } else {
            lastRow = this.tokenizerApplier.applyInternalRepresentation(this.internalRepresentation, out);
        }
        if (lastRow != out.getNumRows()) {
            out = out.slice(0, lastRow - 1, 0, out.getNumColumns() - 1, null);
        }
        return out;
    }

    public List<DependencyTask<?>> getBuildTasks(FrameBlock in) {
        List<DependencyTask<?>> tasks = this.tokenizerBuilder.getTasks(in, this.internalRepresentation);
        List<DependencyTask<?>> applierBuildTaskList = this.tokenizerApplier.getBuildTasks(this.internalRepresentation);
        if (tasks.size() != applierBuildTaskList.size()) {
            throw new DMLRuntimeException("Cannot create dependencies for mismatched array sizes");
        }
        tasks.addAll(applierBuildTaskList);
        ArrayList<Object> deps = new ArrayList<Object>(Collections.nCopies(tasks.size(), null));
        HashMap<Integer[], Integer[]> depMap = new HashMap<Integer[], Integer[]>();
        for (int i = 0; i < tasks.size() / 2; ++i) {
            depMap.put(new Integer[]{i + applierBuildTaskList.size(), i + applierBuildTaskList.size() + 1}, new Integer[]{i, i + 1});
        }
        DependencyThreadPool.createDependencyList(tasks, depMap, deps);
        tasks = DependencyThreadPool.createDependencyTasks(tasks, deps);
        return tasks;
    }

    public void build(FrameBlock in, int k) {
        this.tokenizerApplier.allocateInternalMeta(in.getNumRows());
        if (k > 1) {
            DependencyThreadPool pool = new DependencyThreadPool(k);
            try {
                pool.submitAllAndWait(this.getBuildTasks(in));
            }
            catch (InterruptedException | ExecutionException e) {
                LOG.error((Object)"MT Tokenizer build failed");
                e.printStackTrace();
            }
            pool.shutdown();
        } else {
            this.tokenizerBuilder.createInternalRepresentation(in, this.internalRepresentation);
            this.tokenizerApplier.build(this.internalRepresentation, 0, -1);
        }
    }

    protected static class AllocateOutputFrame
    implements Callable<Object> {
        protected final Tokenizer _tokenizer;
        protected final FrameBlock _out;

        protected AllocateOutputFrame(Tokenizer tokenizer, FrameBlock out) {
            this._tokenizer = tokenizer;
            this._out = out;
        }

        @Override
        public Object call() throws Exception {
            this._out.ensureAllocatedColumns(this._tokenizer.getNumRowsEstimate());
            return null;
        }
    }
}

