/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.tokenizer.lexical;

import com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie;
import com.hankcs.hanlp.collection.trie.DoubleArrayTrie;
import com.hankcs.hanlp.collection.trie.bintrie.BaseNode;
import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.corpus.document.sentence.word.CompoundWord;
import com.hankcs.hanlp.corpus.document.sentence.word.IWord;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.dictionary.other.CharTable;
import com.hankcs.hanlp.dictionary.other.CharType;
import com.hankcs.hanlp.model.perceptron.tagset.NERTagSet;
import com.hankcs.hanlp.recognition.nr.JapanesePersonRecognition;
import com.hankcs.hanlp.recognition.nr.TranslatedPersonRecognition;
import com.hankcs.hanlp.seg.CharacterBasedSegment;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.seg.common.Vertex;
import com.hankcs.hanlp.seg.common.WordNet;
import com.hankcs.hanlp.tokenizer.lexical.LexicalAnalyzer;
import com.hankcs.hanlp.tokenizer.lexical.NERecognizer;
import com.hankcs.hanlp.tokenizer.lexical.POSTagger;
import com.hankcs.hanlp.tokenizer.lexical.Segmenter;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

public class AbstractLexicalAnalyzer
extends CharacterBasedSegment
implements LexicalAnalyzer {
    protected Segmenter segmenter;
    protected POSTagger posTagger;
    protected NERecognizer neRecognizer;
    protected static byte[] typeTable = new byte[CharType.type.length];
    protected boolean enableRuleBasedSegment = false;

    protected AbstractLexicalAnalyzer() {
        this.config.translatedNameRecognize = false;
        this.config.japaneseNameRecognize = false;
    }

    public AbstractLexicalAnalyzer(Segmenter segmenter) {
        this();
        this.segmenter = segmenter;
    }

    public AbstractLexicalAnalyzer(Segmenter segmenter, POSTagger posTagger) {
        this();
        this.segmenter = segmenter;
        this.posTagger = posTagger;
    }

    public AbstractLexicalAnalyzer(Segmenter segmenter, POSTagger posTagger, NERecognizer neRecognizer) {
        this();
        this.segmenter = segmenter;
        this.posTagger = posTagger;
        this.neRecognizer = neRecognizer;
        if (posTagger != null) {
            this.config.speechTagging = true;
            if (neRecognizer != null) {
                this.config.ner = true;
            }
        }
    }

    protected void segment(final String sentence, final String normalized, final List<String> wordList, final List<CoreDictionary.Attribute> attributeList) {
        if (attributeList != null) {
            final int[] offset = new int[]{0};
            CustomDictionary.parseLongestText(sentence, new AhoCorasickDoubleArrayTrie.IHit<CoreDictionary.Attribute>(){

                @Override
                public void hit(int begin, int end, CoreDictionary.Attribute value) {
                    if (begin != offset[0]) {
                        AbstractLexicalAnalyzer.this.segmentAfterRule(sentence.substring(offset[0], begin), normalized.substring(offset[0], begin), wordList);
                    }
                    while (attributeList.size() < wordList.size()) {
                        attributeList.add(null);
                    }
                    wordList.add(sentence.substring(begin, end));
                    attributeList.add(value);
                    assert (wordList.size() == attributeList.size()) : "\u8bcd\u8bed\u5217\u8868\u4e0e\u5c5e\u6027\u5217\u8868\u4e0d\u7b49\u957f";
                    offset[0] = end;
                }
            });
            if (offset[0] != sentence.length()) {
                this.segmentAfterRule(sentence.substring(offset[0]), normalized.substring(offset[0]), wordList);
            }
        } else {
            this.segmentAfterRule(sentence, normalized, wordList);
        }
    }

    @Override
    public void segment(final String sentence, final String normalized, final List<String> wordList) {
        if (this.config.useCustomDictionary) {
            final int[] offset = new int[]{0};
            CustomDictionary.parseLongestText(sentence, new AhoCorasickDoubleArrayTrie.IHit<CoreDictionary.Attribute>(){

                @Override
                public void hit(int begin, int end, CoreDictionary.Attribute value) {
                    if (begin != offset[0]) {
                        AbstractLexicalAnalyzer.this.segmentAfterRule(sentence.substring(offset[0], begin), normalized.substring(offset[0], begin), wordList);
                    }
                    wordList.add(sentence.substring(begin, end));
                    offset[0] = end;
                }
            });
            if (offset[0] != sentence.length()) {
                this.segmentAfterRule(sentence.substring(offset[0]), normalized.substring(offset[0]), wordList);
            }
        } else {
            this.segmentAfterRule(sentence, normalized, wordList);
        }
    }

    @Override
    public List<String> segment(String sentence) {
        return this.segment(sentence, CharTable.convert(sentence));
    }

    @Override
    public String[] recognize(String[] wordArray, String[] posArray) {
        return this.neRecognizer.recognize(wordArray, posArray);
    }

    @Override
    public String[] tag(String ... words) {
        return this.posTagger.tag(words);
    }

    @Override
    public String[] tag(List<String> wordList) {
        return this.posTagger.tag(wordList);
    }

    @Override
    public NERTagSet getNERTagSet() {
        return this.neRecognizer.getNERTagSet();
    }

    @Override
    public Sentence analyze(String sentence) {
        ArrayList<IWord> termList;
        block8: {
            String[] wordArray;
            LinkedList<String> wordList;
            block6: {
                String[] posArray;
                List<CoreDictionary.Attribute> attributeList;
                block7: {
                    if (sentence.isEmpty()) {
                        return new Sentence(Collections.<IWord>emptyList());
                    }
                    String normalized = CharTable.convert(sentence);
                    wordList = new LinkedList<String>();
                    attributeList = this.segmentWithAttribute(sentence, normalized, wordList);
                    wordArray = new String[wordList.size()];
                    int offset = 0;
                    int id = 0;
                    for (String word : wordList) {
                        wordArray[id] = normalized.substring(offset, offset + word.length());
                        ++id;
                        offset += word.length();
                    }
                    termList = new ArrayList<IWord>(wordList.size());
                    if (this.posTagger == null) break block6;
                    posArray = this.tag(wordArray);
                    if (this.neRecognizer == null) break block7;
                    String[] nerArray = this.neRecognizer.recognize(wordArray, posArray);
                    this.overwriteTag(attributeList, posArray);
                    wordList.toArray(wordArray);
                    AbstractList result = new LinkedList<Word>();
                    result.add(new Word(wordArray[0], posArray[0]));
                    String prePos = posArray[0];
                    NERTagSet tagSet = this.getNERTagSet();
                    for (int i = 1; i < nerArray.length; ++i) {
                        if (nerArray[i].charAt(0) == tagSet.B_TAG_CHAR || nerArray[i].charAt(0) == tagSet.S_TAG_CHAR || nerArray[i].charAt(0) == tagSet.O_TAG_CHAR) {
                            termList.add(result.size() > 1 ? new CompoundWord(result, prePos) : (IWord)result.get(0));
                            result = new ArrayList();
                        }
                        result.add((Word)new Word(wordArray[i], posArray[i]));
                        prePos = nerArray[i].charAt(0) == tagSet.O_TAG_CHAR || nerArray[i].charAt(0) == tagSet.S_TAG_CHAR ? posArray[i] : NERTagSet.posOf(nerArray[i]);
                    }
                    if (result.size() == 0) break block8;
                    termList.add(result.size() > 1 ? new CompoundWord(result, prePos) : (IWord)result.get(0));
                    break block8;
                }
                this.overwriteTag(attributeList, posArray);
                wordList.toArray(wordArray);
                for (int i = 0; i < wordArray.length; ++i) {
                    termList.add(new Word(wordArray[i], posArray[i]));
                }
                break block8;
            }
            wordList.toArray(wordArray);
            for (String word : wordArray) {
                termList.add(new Word(word, null));
            }
        }
        return new Sentence(termList);
    }

    private void overwriteTag(List<CoreDictionary.Attribute> attributeList, String[] posArray) {
        if (attributeList != null) {
            int id = 0;
            for (CoreDictionary.Attribute attribute : attributeList) {
                if (attribute != null) {
                    posArray[id] = attribute.nature[0].toString();
                }
                ++id;
            }
        }
    }

    public List<String> segment(String sentence, String normalized) {
        LinkedList<String> wordList = new LinkedList<String>();
        this.segment(sentence, normalized, wordList);
        return wordList;
    }

    protected boolean acceptCustomWord(int begin, int end, CoreDictionary.Attribute value) {
        return this.config.forceCustomDictionary || end - begin >= 4 && !value.hasNatureStartsWith("nr") && !value.hasNatureStartsWith("ns") && !value.hasNatureStartsWith("nt");
    }

    @Override
    protected List<Term> roughSegSentence(char[] sentence) {
        return null;
    }

    @Override
    protected List<Term> segSentence(char[] sentence) {
        if (sentence.length == 0) {
            return Collections.emptyList();
        }
        String original = new String(sentence);
        CharTable.normalization(sentence);
        String normalized = new String(sentence);
        LinkedList<String> wordList = new LinkedList<String>();
        List<CoreDictionary.Attribute> attributeList = this.segmentWithAttribute(original, normalized, wordList);
        List<Term> termList = new ArrayList<Term>(wordList.size());
        int offset = 0;
        for (String word : wordList) {
            Term term = new Term(word, null);
            term.offset = offset;
            offset += term.length();
            termList.add(term);
        }
        if (this.config.speechTagging) {
            if (this.posTagger != null) {
                String[] wordArray = new String[wordList.size()];
                offset = 0;
                int id = 0;
                for (String word : wordList) {
                    wordArray[id] = normalized.substring(offset, offset + word.length());
                    ++id;
                    offset += word.length();
                }
                String[] posArray = this.tag(wordArray);
                Iterator<Object> iterator = termList.iterator();
                Iterator<CoreDictionary.Attribute> attributeIterator = attributeList == null ? null : attributeList.iterator();
                for (int i = 0; i < posArray.length; ++i) {
                    CoreDictionary.Attribute attribute;
                    ((Term)iterator.next()).nature = attributeIterator != null && attributeIterator.hasNext() && (attribute = attributeIterator.next()) != null ? attribute.nature[0] : Nature.create(posArray[i]);
                }
                if (this.config.ner && this.neRecognizer != null) {
                    LinkedList<Object> childrenList = null;
                    if (this.config.isIndexMode()) {
                        childrenList = new LinkedList<Object>();
                        iterator = termList.iterator();
                    }
                    termList = new ArrayList(termList.size());
                    String[] nerArray = this.recognize(wordArray, posArray);
                    wordList.toArray(wordArray);
                    StringBuilder result = new StringBuilder();
                    result.append(wordArray[0]);
                    if (childrenList != null) {
                        childrenList.add(iterator.next());
                    }
                    String prePos = posArray[0];
                    offset = 0;
                    for (int i = 1; i < nerArray.length; ++i) {
                        NERTagSet tagSet = this.getNERTagSet();
                        if (nerArray[i].charAt(0) == tagSet.B_TAG_CHAR || nerArray[i].charAt(0) == tagSet.S_TAG_CHAR || nerArray[i].charAt(0) == tagSet.O_TAG_CHAR) {
                            Term term = new Term(result.toString(), Nature.create(prePos));
                            term.offset = offset;
                            offset += term.length();
                            termList.add(term);
                            if (childrenList != null) {
                                if (childrenList.size() > 1) {
                                    for (Term term2 : childrenList) {
                                        if (term2.length() < this.config.indexMode) continue;
                                        termList.add(term2);
                                    }
                                }
                                childrenList.clear();
                            }
                            result.setLength(0);
                        }
                        result.append(wordArray[i]);
                        if (childrenList != null) {
                            childrenList.add(iterator.next());
                        }
                        prePos = nerArray[i].charAt(0) == tagSet.O_TAG_CHAR || nerArray[i].charAt(0) == tagSet.S_TAG_CHAR ? posArray[i] : NERTagSet.posOf(nerArray[i]);
                    }
                    if (result.length() != 0) {
                        Term term = new Term(result.toString(), Nature.create(prePos));
                        term.offset = offset;
                        termList.add(term);
                        if (childrenList != null && childrenList.size() > 1) {
                            for (Term term3 : childrenList) {
                                if (term3.length() < this.config.indexMode) continue;
                                termList.add(term3);
                            }
                        }
                    }
                }
            } else {
                for (Term term : termList) {
                    CoreDictionary.Attribute attribute = CoreDictionary.get(term.word);
                    if (attribute != null) {
                        term.nature = attribute.nature[0];
                        continue;
                    }
                    term.nature = Nature.n;
                }
            }
        }
        if (this.config.translatedNameRecognize || this.config.japaneseNameRecognize) {
            WordNet wordNetOptimum;
            List<Vertex> vertexList = this.toVertexList(termList, true);
            WordNet wordNetAll = wordNetOptimum = new WordNet(sentence, vertexList);
            if (this.config.translatedNameRecognize) {
                TranslatedPersonRecognition.recognition(vertexList, wordNetOptimum, wordNetAll);
            }
            if (this.config.japaneseNameRecognize) {
                JapanesePersonRecognition.recognition(vertexList, wordNetOptimum, wordNetAll);
            }
            termList = AbstractLexicalAnalyzer.convert(vertexList, this.config.offset);
        }
        return termList;
    }

    private void pushPiece(String sentence, String normalized, int start, int end, byte preType, List<String> wordList) {
        if (preType == 7) {
            this.segmenter.segment(sentence.substring(start, end), normalized.substring(start, end), wordList);
        } else {
            wordList.add(sentence.substring(start, end));
        }
    }

    protected void segmentAfterRule(String sentence, String normalized, List<String> wordList) {
        int start;
        if (!this.enableRuleBasedSegment) {
            this.segmenter.segment(sentence, normalized, wordList);
            return;
        }
        int end = start = 0;
        byte preType = typeTable[normalized.charAt(end)];
        while (++end < normalized.length()) {
            byte curType = typeTable[normalized.charAt(end)];
            if (curType != preType) {
                if (preType == 9) {
                    if ("\uff0c,\uff0e.".indexOf(normalized.charAt(end)) != -1) {
                        if (end + 1 < normalized.length() && typeTable[normalized.charAt(end + 1)] == 9) {
                            continue;
                        }
                    } else if ("\u5e74\u6708\u65e5\u65f6\u5206\u79d2".indexOf(normalized.charAt(end)) != -1) {
                        preType = curType;
                        continue;
                    }
                }
                this.pushPiece(sentence, normalized, start, end, preType, wordList);
                start = end;
            }
            preType = curType;
        }
        if (end == normalized.length()) {
            this.pushPiece(sentence, normalized, start, end, preType, wordList);
        }
    }

    private List<CoreDictionary.Attribute> segmentWithAttribute(String original, String normalized, List<String> wordList) {
        List<CoreDictionary.Attribute> attributeList;
        if (this.config.useCustomDictionary) {
            if (this.config.forceCustomDictionary) {
                attributeList = new LinkedList<CoreDictionary.Attribute>();
                this.segment(original, normalized, wordList, attributeList);
            } else {
                this.segmentAfterRule(original, normalized, wordList);
                attributeList = AbstractLexicalAnalyzer.combineWithCustomDictionary(wordList);
            }
        } else {
            this.segmentAfterRule(original, normalized, wordList);
            attributeList = null;
        }
        return attributeList;
    }

    protected static List<CoreDictionary.Attribute> combineWithCustomDictionary(List<String> vertexList) {
        CoreDictionary.Attribute value;
        int end;
        int to;
        int i;
        String[] wordNet = new String[vertexList.size()];
        vertexList.toArray(wordNet);
        CoreDictionary.Attribute[] attributeArray = new CoreDictionary.Attribute[wordNet.length];
        DoubleArrayTrie<CoreDictionary.Attribute> dat = CustomDictionary.dat;
        int length = wordNet.length;
        for (i = 0; i < length; ++i) {
            int state = 1;
            if ((state = dat.transition(wordNet[i], state)) <= 0) continue;
            end = to = i + 1;
            value = dat.output(state);
            while (to < length && (state = dat.transition(wordNet[to], state)) >= 0) {
                CoreDictionary.Attribute output = dat.output(state);
                if (output != null) {
                    value = output;
                    end = to + 1;
                }
                ++to;
            }
            if (value == null) continue;
            AbstractLexicalAnalyzer.combineWords(wordNet, i, end, attributeArray, value);
            i = end - 1;
        }
        if (CustomDictionary.trie != null) {
            for (i = 1; i < length; ++i) {
                BaseNode state;
                if (wordNet[i] == null || (state = CustomDictionary.trie.transition(wordNet[i], 0)) == null) continue;
                end = to = i + 1;
                value = (CoreDictionary.Attribute)state.getValue();
                while (to < length) {
                    if (wordNet[to] != null) {
                        if ((state = state.transition(wordNet[to], 0)) == null) break;
                        if (state.getValue() != null) {
                            value = (CoreDictionary.Attribute)state.getValue();
                            end = to + 1;
                        }
                    }
                    ++to;
                }
                if (value == null) continue;
                AbstractLexicalAnalyzer.combineWords(wordNet, i, end, attributeArray, value);
                i = end - 1;
            }
        }
        vertexList.clear();
        LinkedList<CoreDictionary.Attribute> attributeList = new LinkedList<CoreDictionary.Attribute>();
        for (int i2 = 0; i2 < wordNet.length; ++i2) {
            if (wordNet[i2] == null) continue;
            vertexList.add(wordNet[i2]);
            attributeList.add(attributeArray[i2]);
        }
        return attributeList;
    }

    private static void combineWords(String[] wordNet, int start, int end, CoreDictionary.Attribute[] attributeArray, CoreDictionary.Attribute value) {
        if (start + 1 != end) {
            StringBuilder sbTerm = new StringBuilder();
            for (int j = start; j < end; ++j) {
                if (wordNet[j] == null) continue;
                sbTerm.append(wordNet[j]);
                wordNet[j] = null;
            }
            wordNet[start] = sbTerm.toString();
        }
        attributeArray[start] = value;
    }

    public AbstractLexicalAnalyzer enableRuleBasedSegment(boolean enableRuleBasedSegment) {
        this.enableRuleBasedSegment = enableRuleBasedSegment;
        return this;
    }

    static {
        System.arraycopy(CharType.type, 0, typeTable, 0, typeTable.length);
        for (char c : "\u96f6\u25cb\u3007\u4e00\u4e8c\u4e24\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341\u5eff\u767e\u5343\u4e07\u4ebf\u58f9\u8d30\u53c1\u8086\u4f0d\u9646\u67d2\u634c\u7396\u62fe\u4f70\u4edf".toCharArray()) {
            AbstractLexicalAnalyzer.typeTable[c] = 7;
        }
        AbstractLexicalAnalyzer.typeTable[CharTable.convert((char)'\u00b7')] = 7;
    }
}

