package com.topologi.diffx.load.text;

import com.topologi.diffx.config.TextGranularity;
import com.topologi.diffx.config.WhiteSpaceProcessing;
import com.topologi.diffx.event.TextEvent;
import com.topologi.diffx.event.impl.IgnorableSpaceEvent;
import com.topologi.diffx.event.impl.SpaceEvent;
import com.topologi.diffx.event.impl.WordEvent;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:WEB-INF/lib/docx4j-6.0.1.jar:com/topologi/diffx/load/text/TokenizerByWord.class */
public final class TokenizerByWord implements TextTokenizer {
    private final Map<String, TextEvent> recycling = new HashMap();
    private final WhiteSpaceProcessing whitespace;

    public TokenizerByWord(WhiteSpaceProcessing whiteSpaceProcessing) {
        if (whiteSpaceProcessing == null) {
            throw new NullPointerException("the white space processing must be specified.");
        }
        this.whitespace = whiteSpaceProcessing;
    }

    @Override // com.topologi.diffx.load.text.TextTokenizer
    public List<TextEvent> tokenize(CharSequence charSequence) {
        int i;
        if (charSequence == null) {
            return null;
        }
        if (charSequence.length() == 0) {
            return Collections.emptyList();
        }
        ArrayList arrayList = new ArrayList(charSequence.length());
        Matcher matcher = Pattern.compile("\\s+").matcher(charSequence);
        int i2 = 0;
        while (true) {
            i = i2;
            if (!matcher.find()) {
                break;
            }
            if (i != matcher.start()) {
                arrayList.add(getWordEvent(charSequence.subSequence(i, matcher.start()).toString()));
            }
            if (this.whitespace != WhiteSpaceProcessing.IGNORE) {
                arrayList.add(getSpaceEvent(charSequence.subSequence(matcher.start(), matcher.end()).toString()));
            }
            i2 = matcher.end();
        }
        if (i != charSequence.length()) {
            arrayList.add(getWordEvent(charSequence.subSequence(i, charSequence.length()).toString()));
        }
        return arrayList;
    }

    @Override // com.topologi.diffx.load.text.TextTokenizer
    public TextGranularity granurality() {
        return TextGranularity.WORD;
    }

    private TextEvent getWordEvent(String str) {
        TextEvent textEvent = this.recycling.get(str);
        if (textEvent == null) {
            textEvent = new WordEvent(str);
            this.recycling.put(str, textEvent);
        }
        return textEvent;
    }

    private TextEvent getSpaceEvent(String str) {
        TextEvent textEvent = this.recycling.get(str);
        if (textEvent == null) {
            textEvent = this.whitespace == WhiteSpaceProcessing.PRESERVE ? new IgnorableSpaceEvent(str) : SpaceEvent.getInstance(str);
            this.recycling.put(str, textEvent);
        }
        return textEvent;
    }
}
