package org.apache.lucene.analysis.kuromoji;

import java.io.Reader;
import java.text.BreakIterator;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.analysis.kuromoji.tokenattributes.BaseFormAttribute;
import org.apache.lucene.analysis.kuromoji.tokenattributes.InflectionAttribute;
import org.apache.lucene.analysis.kuromoji.tokenattributes.PartOfSpeechAttribute;
import org.apache.lucene.analysis.kuromoji.tokenattributes.ReadingAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.SegmentingTokenizerBase;

/* loaded from: input_file:WEB-INF/lib/lucene-analyzers-kuromoji-4.0.1.jar:org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.class */
public final class KuromojiTokenizer extends SegmentingTokenizerBase {
    private static final BreakIterator proto = BreakIterator.getSentenceInstance(Locale.JAPAN);
    private final CharTermAttribute termAtt;
    private final OffsetAttribute offsetAtt;
    private final BaseFormAttribute basicFormAtt;
    private final PartOfSpeechAttribute posAtt;
    private final ReadingAttribute readingAtt;
    private final InflectionAttribute inflectionAtt;
    private final Segmenter segmenter;
    private List<Token> tokens;
    private int tokenIndex;
    private int sentenceStart;

    public KuromojiTokenizer(Reader reader) {
        this(new Segmenter(), reader);
    }

    public KuromojiTokenizer(Segmenter segmenter, Reader reader) {
        super(reader, (BreakIterator) proto.clone());
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        this.basicFormAtt = (BaseFormAttribute) addAttribute(BaseFormAttribute.class);
        this.posAtt = (PartOfSpeechAttribute) addAttribute(PartOfSpeechAttribute.class);
        this.readingAtt = (ReadingAttribute) addAttribute(ReadingAttribute.class);
        this.inflectionAtt = (InflectionAttribute) addAttribute(InflectionAttribute.class);
        this.tokenIndex = 0;
        this.sentenceStart = 0;
        this.segmenter = segmenter;
    }

    @Override // org.apache.lucene.analysis.util.SegmentingTokenizerBase
    protected void setNextSentence(int i, int i2) {
        this.sentenceStart = i;
        this.tokens = this.segmenter.doTokenize(0, this.buffer, i, i2 - i, true);
        this.tokenIndex = 0;
    }

    @Override // org.apache.lucene.analysis.util.SegmentingTokenizerBase
    protected boolean incrementWord() {
        if (this.tokenIndex == this.tokens.size()) {
            return false;
        }
        Token token = this.tokens.get(this.tokenIndex);
        int position = token.getPosition();
        int length = token.getLength();
        clearAttributes();
        this.termAtt.copyBuffer(this.buffer, this.sentenceStart + position, length);
        int i = this.offset + this.sentenceStart + position;
        this.offsetAtt.setOffset(correctOffset(i), correctOffset(i + length));
        this.basicFormAtt.setToken(token);
        this.posAtt.setToken(token);
        this.readingAtt.setToken(token);
        this.inflectionAtt.setToken(token);
        this.tokenIndex++;
        return true;
    }
}
