/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.sentiment;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.PTBEscapingProcessor;
import edu.stanford.nlp.trees.LabeledScoredTreeNode;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon;
import edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.function.Function;

public class ReadSentimentDataset {
    private static Redwood.RedwoodChannels log = Redwood.channels(ReadSentimentDataset.class);
    static final Function<Tree, String> TRANSFORM_TREE_TO_WORD = tree -> tree.label().value();
    static final Function<String, String> TRANSFORM_PARENS = word -> {
        if (word.equals("(")) {
            return "-LRB-";
        }
        if (word.equals(")")) {
            return "-RRB-";
        }
        return word;
    };
    static final TregexPattern[] tregexPatterns = new TregexPattern[]{TregexPattern.compile("__=single <1 (__ < /^-LRB-$/) <2 (__ <... { (__ < /^[a-zA-Z]$/=letter) ; (__ < /^-RRB-$/) }) > (__ <2 =single <1 (__=useless <<- (__=word !< __)))"), TregexPattern.compile("__=single <1 (__ < /^-LRB-$/) <2 (__ <... { (__ < /^[aA]$/=letter) ; (__ < /^-RRB-$/) }) > (__ <1 =single <2 (__=useless <<, /^n$/=word))"), TregexPattern.compile("__=single <1 (__ < /^-LRB-$/) <2 (__=A <... { (__ < /^[aA]$/=letter) ; (__=paren < /^-RRB-$/) })"), TregexPattern.compile("__ <1 (__ <<- (/^(?i:provide)$/=provide !<__)) <2 (__ <<, (__=s > __=useless <... { (__ <: -LRB-) ; (__ <1 (__ <: s)) } ))"), TregexPattern.compile("__=single <1 (__ < /^-LRB-$/) <2 (__ <... { (__ < /^[a-zA-Z]$/=letter) ; (__ < /^-RRB-$/) }) > (__ <1 =single <2 (__=useless <<, (__=word !< __)))"), TregexPattern.compile("-LRB-=lrb !, __ : (__=ltop > __ <<, =lrb <<- (-RRB-=rrb > (__ > __=rtop)) !<< (-RRB- !== =rrb))"), TregexPattern.compile("__=top <1 (__=f1 < f) <2 (__=f2 <... { (__ < /^[*\\\\]+$/) ; (__ < ed) })"), TregexPattern.compile("__=top <1 (__=f1 <1 (__ < don=do) <2 (__ < /^[']$/=apos)) <2 (__=wrong < t)"), TregexPattern.compile("-LRB-=lrb !, __ .. (-RRB-=rrb !< __ !.. -RRB-)"), TregexPattern.compile("-LRB-=lrb . and|Haneke|is|Evans|Harmon|Harris|its|it|Aniston|headbanger|Testud|but|frames|yet|Denis|DeNiro|sinks|screenwriter|Cho|meditation|Watts|that|the|this|Madonna|Ahola|Franco|Hopkins|Crudup|writer-director|Diggs|very|Crane|Frei|Reno|Jones|Quills|Bobby|Hill|Kim|subjects|Wang|Jaglom|Vega|Sabara|Sade|Goldbacher|too|being|opening=last : (=last . -RRB-=rrb)"), TregexPattern.compile("-LRB-=lrb . (__=n1 !< __ . (__=n2 !< __ . -RRB-=rrb)) : (=n1 (== Besson|Kissinger|Godard|Seagal|jaglon|It|it|Tsai|Nelson|Rifkan|Shakespeare|Solondz|Madonna|Herzog|Witherspoon|Woo|Eyre|there|Moore|Ricci|Seinfeld . (=n2 == /^'s$/)) | (== Denis|Skins|Spears|Assayas . (=n2 == /^'$/)) | (== Je-Gyu . (=n2 == is)) | (== the . (=n2 == leads|film|story|characters)) | (== Monsoon . (=n2 == Wedding)) | (== De . (=n2 == Niro)) | (== Roman . (=n2 == Coppola)) | (== than . (=n2 == Leon)) | (==Colgate . (=n2 == /^U.$/)) | (== teen . (=n2 == comedy)) | (== a . (=n2 == remake)) | (== Powerpuff . (=n2 == Girls)) | (== Woody . (=n2 == Allen)))"), TregexPattern.compile("-LRB-=lrb . (__=n1 !< __ . (__=n2 !< __ . (__=n3 !< __ . -RRB-=rrb))) : (=n1 [ (== the . (=n2 == characters . (=n3 == /^'$/))) | (== the . (=n2 == movie . (=n3 == /^'s$/))) | (== of . (=n2 == middle-aged . (=n3 == romance))) | (== Jack . (=n2 == Nicholson . (=n3 == /^'s$/))) | (== De . (=n2 == Palma . (=n3 == /^'s$/))) | (== Clara . (=n2 == and . (=n3 == Paul))) | (== Sex . (=n2 == and . (=n3 == Luc\u00eda))) ])"), TregexPattern.compile("/^401$/ > (__ > __=top)"), TregexPattern.compile("by . (all > (__=all > __=allgp) . (means > (__=means > __=meansgp))) : (=allgp !== =meansgp)"), TregexPattern.compile("/^(?:20th|21st)$/ . Century=century"), TregexPattern.compile("__ <: (__=unitary < __)"), TregexPattern.compile("/^[1]$/=label <: /^(?i:protagonist)$/")};
    static final TsurgeonPattern[] tsurgeonPatterns = new TsurgeonPattern[]{Tsurgeon.parseOperation("[relabel word /^.*$/={word}={letter}/] [prune single] [excise useless useless]"), Tsurgeon.parseOperation("[relabel word /^.*$/={letter}n/] [prune single] [excise useless useless]"), Tsurgeon.parseOperation("[excise single A] [prune paren]"), Tsurgeon.parseOperation("[relabel provide /^.*$/={provide}s/] [prune s] [excise useless useless]"), Tsurgeon.parseOperation("[relabel word /^.*$/={letter}={word}/] [prune single] [excise useless useless]"), Tsurgeon.parseOperation("[prune lrb] [prune rrb] [excise ltop ltop] [excise rtop rtop]"), Tsurgeon.parseOperation("replace top (0 fucked)"), Tsurgeon.parseOperation("[prune wrong] [relabel do do] [relabel apos /^.*$/n={apos}t/] [excise top top]"), Tsurgeon.parseOperation("[prune rrb] [prune lrb]"), Tsurgeon.parseOperation("[prune rrb] [prune lrb]"), Tsurgeon.parseOperation("[prune rrb] [prune lrb]"), Tsurgeon.parseOperation("[prune rrb] [prune lrb]"), Tsurgeon.parseOperation("replace top (2 (2 401k) (2 statement))"), Tsurgeon.parseOperation("[move means $- all] [excise meansgp meansgp] [createSubtree 2 all means]"), Tsurgeon.parseOperation("relabel century century"), Tsurgeon.parseOperation("[excise unitary unitary]"), Tsurgeon.parseOperation("relabel label /^.*$/2/")};

    private ReadSentimentDataset() {
    }

    public static Tree convertTree(List<Integer> parentPointers, List<String> sentence, Map<List<String>, Integer> phraseIds, Map<Integer, Double> sentimentScores, PTBEscapingProcessor escaper) {
        int i;
        int i2;
        int maxNode = 0;
        for (Integer parent : parentPointers) {
            maxNode = Math.max(maxNode, parent);
        }
        Tree[] subtrees = new Tree[maxNode + 1];
        for (i2 = 0; i2 < sentence.size(); ++i2) {
            CoreLabel word = new CoreLabel();
            word.setValue(sentence.get(i2));
            LabeledScoredTreeNode leaf = new LabeledScoredTreeNode(word);
            subtrees[i2] = new LabeledScoredTreeNode(new CoreLabel());
            subtrees[i2].addChild(leaf);
        }
        for (i2 = sentence.size(); i2 <= maxNode; ++i2) {
            subtrees[i2] = new LabeledScoredTreeNode(new CoreLabel());
        }
        boolean[] connected = new boolean[maxNode + 1];
        Tree root = null;
        for (int index = 0; index < parentPointers.size(); ++index) {
            if (parentPointers.get(index) == -1) {
                if (root != null) {
                    throw new RuntimeException("Found two roots for sentence " + sentence);
                }
                root = subtrees[index];
                continue;
            }
            ReadSentimentDataset.connect(parentPointers, subtrees, connected, index);
        }
        for (i = 0; i <= maxNode; ++i) {
            List leaves = subtrees[i].getLeaves();
            List<String> words = CollectionUtils.transformAsList(leaves, TRANSFORM_TREE_TO_WORD);
            Integer phraseId = phraseIds.get(CollectionUtils.transformAsList(words, TRANSFORM_PARENS));
            if (phraseId == null) {
                phraseId = phraseIds.get(words);
            }
            if (phraseId == null) {
                throw new RuntimeException("Could not find phrase id for phrase " + sentence);
            }
            Double score = sentimentScores.get(phraseId);
            if (score == null) {
                throw new RuntimeException("Could not find sentiment score for phrase id " + phraseId);
            }
            int classLabel = Math.round((float)Math.floor(score * 5.0));
            if (classLabel > 4) {
                classLabel = 4;
            }
            subtrees[i].label().setValue(Integer.toString(classLabel));
        }
        for (i = 0; i < sentence.size(); ++i) {
            Tree leaf = subtrees[i].children()[0];
            leaf.label().setValue(escaper.escapeString(leaf.label().value()));
        }
        for (i = 0; i < tregexPatterns.length; ++i) {
            root = Tsurgeon.processPattern(tregexPatterns[i], tsurgeonPatterns[i], root);
        }
        return root;
    }

    private static void connect(List<Integer> parentPointers, Tree[] subtrees, boolean[] connected, int index) {
        if (connected[index]) {
            return;
        }
        if (parentPointers.get(index) < 0) {
            return;
        }
        subtrees[parentPointers.get(index)].addChild(subtrees[index]);
        connected[index] = true;
        ReadSentimentDataset.connect(parentPointers, subtrees, connected, parentPointers.get(index));
    }

    private static void writeTrees(String filename, List<Tree> trees, List<Integer> treeIds) {
        try {
            FileOutputStream fos = new FileOutputStream(filename);
            BufferedWriter bout = new BufferedWriter(new OutputStreamWriter(fos));
            for (Integer id : treeIds) {
                bout.write(trees.get(id).toString());
                bout.write("\n");
            }
            bout.flush();
            fos.close();
        }
        catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }

    /*
     * WARNING - void declaration
     */
    public static void main(String[] args) {
        Object id;
        String dictionaryFilename = null;
        String sentimentFilename = null;
        String tokensFilename = null;
        String parseFilename = null;
        String splitFilename = null;
        String trainFilename = null;
        String devFilename = null;
        String testFilename = null;
        int argIndex = 0;
        while (argIndex < args.length) {
            if (args[argIndex].equalsIgnoreCase("-dictionary")) {
                dictionaryFilename = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-sentiment")) {
                sentimentFilename = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-tokens")) {
                tokensFilename = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-parse")) {
                parseFilename = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-split")) {
                splitFilename = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-inputDir") || args[argIndex].equalsIgnoreCase("-inputDirectory")) {
                dictionaryFilename = args[argIndex + 1] + "/dictionary.txt";
                sentimentFilename = args[argIndex + 1] + "/sentiment_labels.txt";
                tokensFilename = args[argIndex + 1] + "/SOStr.txt";
                parseFilename = args[argIndex + 1] + "/STree.txt";
                splitFilename = args[argIndex + 1] + "/datasetSplit.txt";
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-train")) {
                trainFilename = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-dev")) {
                devFilename = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-test")) {
                testFilename = args[argIndex + 1];
                argIndex += 2;
                continue;
            }
            if (args[argIndex].equalsIgnoreCase("-outputDir") || args[argIndex].equalsIgnoreCase("-outputDirectory")) {
                trainFilename = args[argIndex + 1] + "/train.txt";
                devFilename = args[argIndex + 1] + "/dev.txt";
                testFilename = args[argIndex + 1] + "/test.txt";
                argIndex += 2;
                continue;
            }
            log.info("Unknown argument " + args[argIndex]);
            System.exit(2);
        }
        ArrayList<List<String>> sentences = Generics.newArrayList();
        for (String string : IOUtils.readLines(tokensFilename, "utf-8")) {
            String[] stringArray = string.split("\\|");
            sentences.add(Arrays.asList(stringArray));
        }
        Map<List<String>, Integer> phraseIds = Generics.newHashMap();
        for (String string : IOUtils.readLines(dictionaryFilename, "utf-8")) {
            String[] pieces = string.split("\\|");
            String[] sentence = pieces[0].split(" ");
            id = Integer.valueOf(pieces[1]);
            phraseIds.put(Arrays.asList(sentence), (Integer)id);
        }
        Map<Integer, Double> map = Generics.newHashMap();
        for (String line : IOUtils.readLines(sentimentFilename, "utf-8")) {
            if (line.startsWith("phrase")) continue;
            String[] pieces = line.split("\\|");
            id = Integer.valueOf(pieces[0]);
            Double score = Double.valueOf(pieces[1]);
            map.put((Integer)id, score);
        }
        boolean bl = false;
        PTBEscapingProcessor escaper = new PTBEscapingProcessor();
        ArrayList<Tree> trees = Generics.newArrayList();
        for (String line : IOUtils.readLines(parseFilename, "utf-8")) {
            void var13_20;
            String[] pieces = line.split("\\|");
            List<Integer> parentPointers = CollectionUtils.transformAsList(Arrays.asList(pieces), arg -> Integer.valueOf(arg) - 1);
            Tree tree = ReadSentimentDataset.convertTree(parentPointers, (List)sentences.get((int)var13_20), phraseIds, map, escaper);
            ++var13_20;
            trees.add(tree);
        }
        Map splits = Generics.newHashMap();
        splits.put(1, Generics.newArrayList());
        splits.put(2, Generics.newArrayList());
        splits.put(3, Generics.newArrayList());
        for (String line : IOUtils.readLines(splitFilename, "utf-8")) {
            if (line.startsWith("sentence_index")) continue;
            String[] pieces = line.split(",");
            Integer treeId = Integer.valueOf(pieces[0]) - 1;
            Integer fileId = Integer.valueOf(pieces[1]);
            ((List)splits.get(fileId)).add(treeId);
        }
        ReadSentimentDataset.writeTrees(trainFilename, trees, (List)splits.get(1));
        ReadSentimentDataset.writeTrees(testFilename, trees, (List)splits.get(2));
        ReadSentimentDataset.writeTrees(devFilename, trees, (List)splits.get(3));
    }

    static {
        if (tregexPatterns.length != tsurgeonPatterns.length) {
            throw new RuntimeException("Expected the same number of tregex and tsurgeon when initializing");
        }
    }
}

