package org.apache.lucene.analysis.kuromoji;

import java.io.IOException;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.analysis.kuromoji.dict.ConnectionCosts;
import org.apache.lucene.analysis.kuromoji.dict.Dictionary;
import org.apache.lucene.analysis.kuromoji.dict.TokenInfoDictionary;
import org.apache.lucene.analysis.kuromoji.dict.UnknownDictionary;
import org.apache.lucene.analysis.kuromoji.dict.UserDictionary;
import org.apache.lucene.analysis.kuromoji.viterbi.GraphvizFormatter;
import org.apache.lucene.analysis.kuromoji.viterbi.Viterbi;
import org.apache.lucene.analysis.kuromoji.viterbi.ViterbiNode;

/* loaded from: input_file:WEB-INF/lib/lucene-analyzers-kuromoji-4.0.1.jar:org/apache/lucene/analysis/kuromoji/Segmenter.class */
public class Segmenter {
    public static final Mode DEFAULT_MODE = Mode.SEARCH;
    private final Viterbi viterbi;
    private final EnumMap<ViterbiNode.Type, Dictionary> dictionaryMap;
    private final boolean split;

    /* loaded from: input_file:WEB-INF/lib/lucene-analyzers-kuromoji-4.0.1.jar:org/apache/lucene/analysis/kuromoji/Segmenter$Mode.class */
    public enum Mode {
        NORMAL,
        SEARCH,
        EXTENDED
    }

    public Segmenter() {
        this(null, DEFAULT_MODE, false);
    }

    public Segmenter(Mode mode) {
        this(null, mode, false);
    }

    public Segmenter(UserDictionary userDictionary) {
        this(userDictionary, DEFAULT_MODE, false);
    }

    public Segmenter(UserDictionary userDictionary, Mode mode) {
        this(userDictionary, mode, false);
    }

    public Segmenter(UserDictionary userDictionary, Mode mode, boolean z) {
        this.dictionaryMap = new EnumMap<>(ViterbiNode.Type.class);
        TokenInfoDictionary tokenInfoDictionary = TokenInfoDictionary.getInstance();
        UnknownDictionary unknownDictionary = UnknownDictionary.getInstance();
        this.viterbi = new Viterbi(tokenInfoDictionary, unknownDictionary, ConnectionCosts.getInstance(), userDictionary, mode);
        this.split = z;
        this.dictionaryMap.put((EnumMap<ViterbiNode.Type, Dictionary>) ViterbiNode.Type.KNOWN, (ViterbiNode.Type) tokenInfoDictionary);
        this.dictionaryMap.put((EnumMap<ViterbiNode.Type, Dictionary>) ViterbiNode.Type.UNKNOWN, (ViterbiNode.Type) unknownDictionary);
        this.dictionaryMap.put((EnumMap<ViterbiNode.Type, Dictionary>) ViterbiNode.Type.USER, (ViterbiNode.Type) userDictionary);
    }

    public List<Token> tokenize(String str) {
        if (!this.split) {
            return doTokenize(0, str);
        }
        List<Integer> splitPositions = getSplitPositions(str);
        if (splitPositions.size() == 0) {
            return doTokenize(0, str);
        }
        ArrayList arrayList = new ArrayList();
        int i = 0;
        Iterator<Integer> it = splitPositions.iterator();
        while (it.hasNext()) {
            int intValue = it.next().intValue();
            arrayList.addAll(doTokenize(i, str.substring(i, intValue + 1)));
            i = intValue + 1;
        }
        if (i < str.length()) {
            arrayList.addAll(doTokenize(i, str.substring(i)));
        }
        return arrayList;
    }

    private List<Integer> getSplitPositions(String str) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        while (true) {
            int i2 = i;
            int indexOf = str.indexOf("。", i2);
            int indexOf2 = str.indexOf("、", i2);
            int max = (indexOf < 0 || indexOf2 < 0) ? Math.max(indexOf, indexOf2) : Math.min(indexOf, indexOf2);
            if (max < 0) {
                return arrayList;
            }
            arrayList.add(Integer.valueOf(max));
            i = max + 1;
        }
    }

    private List<Token> doTokenize(int i, String str) {
        char[] charArray = str.toCharArray();
        return doTokenize(i, charArray, 0, charArray.length, false);
    }

    public List<Token> doTokenize(int i, char[] cArr, int i2, int i3, boolean z) {
        ArrayList arrayList = new ArrayList();
        try {
            for (ViterbiNode viterbiNode : this.viterbi.search(this.viterbi.build(cArr, i2, i3))) {
                int wordId = viterbiNode.getWordId();
                if (viterbiNode.getType() != ViterbiNode.Type.KNOWN || wordId != -1) {
                    if (!z || viterbiNode.getLength() <= 0 || !isPunctuation(viterbiNode.getSurfaceForm()[viterbiNode.getOffset()])) {
                        arrayList.add(new Token(wordId, viterbiNode.getSurfaceForm(), viterbiNode.getOffset(), viterbiNode.getLength(), viterbiNode.getType(), i + viterbiNode.getStartIndex(), this.dictionaryMap.get(viterbiNode.getType())));
                    }
                }
            }
            return arrayList;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public String debugTokenize(String str) {
        try {
            ViterbiNode[][][] build = this.viterbi.build(str.toCharArray(), 0, str.length());
            return new GraphvizFormatter(ConnectionCosts.getInstance()).format(build[0], build[1], this.viterbi.search(build));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    static final boolean isPunctuation(char c) {
        switch (Character.getType(c)) {
            case 12:
            case 13:
            case 14:
            case 15:
            case 16:
            case 20:
            case 21:
            case 22:
            case 23:
            case 24:
            case 25:
            case 26:
            case 27:
            case 28:
            case 29:
            case 30:
                return true;
            case 17:
            case 18:
            case 19:
            default:
                return false;
        }
    }
}
