/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.mining.word2vec;

import com.hankcs.hanlp.mining.word2vec.Config;
import com.hankcs.hanlp.mining.word2vec.Corpus;
import com.hankcs.hanlp.mining.word2vec.TrainingCallback;
import com.hankcs.hanlp.mining.word2vec.Utils;
import com.hankcs.hanlp.mining.word2vec.VocabWord;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.TreeMap;

public class TextFileCorpus
extends Corpus {
    private static final int VOCAB_MAX_SIZE = 30000000;
    private int minReduce = 1;
    private BufferedReader raf = null;
    private DataOutputStream cache;
    String[] wordsBuffer = new String[0];
    int wbp = this.wordsBuffer.length;

    public TextFileCorpus(Config config) throws IOException {
        super(config);
    }

    @Override
    public void shutdown() throws IOException {
        Utils.closeQuietly(this.raf);
        this.wordsBuffer = null;
    }

    @Override
    public void rewind(int numThreads, int id) throws IOException {
        super.rewind(numThreads, id);
    }

    @Override
    public String nextWord() throws IOException {
        return this.readWord(this.raf);
    }

    void reduceVocab() {
        this.table = new int[this.vocabSize];
        int j = 0;
        for (int i = 0; i < this.vocabSize; ++i) {
            if (this.vocab[i].cn > this.minReduce) {
                this.vocab[j].cn = this.vocab[i].cn;
                this.vocab[j].word = this.vocab[i].word;
                this.table[((Integer)this.vocabIndexMap.get((Object)this.vocab[j].word)).intValue()] = j;
                ++j;
                continue;
            }
            this.table[((Integer)this.vocabIndexMap.get((Object)this.vocab[j].word)).intValue()] = -4;
        }
        try {
            this.cache.close();
            File fixingFile = new File(this.cacheFile.getAbsolutePath() + ".fixing");
            this.cache = new DataOutputStream(new FileOutputStream(fixingFile));
            DataInputStream oldCache = new DataInputStream(new FileInputStream(this.cacheFile));
            while (oldCache.available() >= 4) {
                int id = this.table[oldCache.readInt()];
                if (id == -4) continue;
                this.cache.writeInt(id);
            }
            oldCache.close();
            this.cache.close();
            if (!fixingFile.renameTo(this.cacheFile)) {
                throw new RuntimeException(String.format("moving %s to %s failed", fixingFile.getAbsolutePath(), this.cacheFile.getName()));
            }
            this.cache = new DataOutputStream(new FileOutputStream(this.cacheFile));
        }
        catch (IOException e) {
            throw new RuntimeException(String.format("failed to adjust cache file", e));
        }
        this.table = null;
        this.vocabSize = j;
        this.vocabIndexMap.clear();
        for (int i = 0; i < this.vocabSize; ++i) {
            this.vocabIndexMap.put(this.vocab[i].word, i);
        }
        ++this.minReduce;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void learnVocab() throws IOException {
        this.vocab = new VocabWord[this.vocabMaxSize];
        this.vocabIndexMap = new TreeMap();
        this.vocabSize = 0;
        File trainFile = new File(this.config.getInputFile());
        BufferedReader raf = null;
        FileInputStream fileInputStream = null;
        this.cache = null;
        this.vocabSize = 0;
        TrainingCallback callback = this.config.getCallback();
        try {
            String word;
            fileInputStream = new FileInputStream(trainFile);
            raf = new BufferedReader(new InputStreamReader((InputStream)fileInputStream, this.encoding));
            this.cacheFile = File.createTempFile(String.format("corpus_%d", System.currentTimeMillis()), ".bin");
            this.cache = new DataOutputStream(new FileOutputStream(this.cacheFile));
            while ((word = this.readWord(raf)) != null || !this.eoc) {
                int idx;
                ++this.trainWords;
                if (this.trainWords % 100000 == 0) {
                    if (callback == null) {
                        System.err.printf("%c%.2f%% %dK", 13, Float.valueOf((1.0f - (float)fileInputStream.available() / (float)trainFile.length()) * 100.0f), this.trainWords / 1000);
                        System.err.flush();
                    } else {
                        callback.corpusLoading((1.0f - (float)fileInputStream.available() / (float)trainFile.length()) * 100.0f);
                    }
                }
                if ((idx = this.searchVocab(word)) == -1) {
                    idx = this.addWordToVocab(word);
                    this.vocab[idx].cn = 1;
                } else {
                    ++this.vocab[idx].cn;
                }
                if ((double)this.vocabSize > 2.1E7) {
                    this.reduceVocab();
                    idx = this.searchVocab(word);
                }
                this.cache.writeInt(idx);
            }
        }
        catch (Throwable throwable) {
            Utils.closeQuietly(fileInputStream);
            Utils.closeQuietly(raf);
            Utils.closeQuietly(this.cache);
            System.err.println();
            throw throwable;
        }
        Utils.closeQuietly(fileInputStream);
        Utils.closeQuietly(raf);
        Utils.closeQuietly(this.cache);
        System.err.println();
        if (callback == null) {
            System.err.printf("%c100%% %dK", 13, this.trainWords / 1000);
            System.err.flush();
        } else {
            callback.corpusLoading(100.0f);
            callback.corpusLoaded(this.vocabSize, this.trainWords, this.trainWords);
        }
    }

    String readWord(BufferedReader raf) throws IOException {
        while (this.wbp >= this.wordsBuffer.length) {
            String line = raf.readLine();
            if (line == null) {
                this.eoc = true;
                return null;
            }
            if ((line = line.trim()).length() == 0) continue;
            this.cache.writeInt(-3);
            this.wordsBuffer = line.split("\\s+");
            this.wbp = 0;
            this.eoc = false;
        }
        return this.wordsBuffer[this.wbp++];
    }
}

