/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.hcoref.docreader;

import edu.stanford.nlp.hcoref.CorefCoreAnnotations;
import edu.stanford.nlp.hcoref.CorefProperties;
import edu.stanford.nlp.hcoref.data.InputDoc;
import edu.stanford.nlp.hcoref.data.Mention;
import edu.stanford.nlp.hcoref.docreader.DocReader;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.ChunkAnnotationUtils;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.semgraph.SemanticGraphFactory;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.IntCounter;
import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory;
import edu.stanford.nlp.trees.ModCollinsHeadFinder;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.trees.TreeNormalizer;
import edu.stanford.nlp.trees.Trees;
import edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalStructure;
import edu.stanford.nlp.trees.international.pennchinese.ChineseSemanticHeadFinder;
import edu.stanford.nlp.util.AbstractIterator;
import edu.stanford.nlp.util.CollectionFactory;
import edu.stanford.nlp.util.CollectionValuedMap;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Filters;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.IntPair;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Properties;
import java.util.Set;
import java.util.Stack;
import java.util.logging.Logger;
import java.util.regex.Pattern;

public class CoNLLDocumentReader
implements DocReader {
    private static final int FIELD_LAST = -1;
    private static final int FIELD_DOC_ID = 0;
    private static final int FIELD_PART_NO = 1;
    private static final int FIELD_WORD_NO = 2;
    private static final int FIELD_WORD = 3;
    private static final int FIELD_POS_TAG = 4;
    private static final int FIELD_PARSE_BIT = 5;
    private static final int FIELD_SPEAKER_AUTHOR = 9;
    private static final int FIELD_NER_TAG = 10;
    private static final int FIELD_COREF = -1;
    private static final int FIELDS_MIN = 12;
    private DocumentIterator docIterator;
    protected final List<File> fileList;
    private int curFileIndex;
    private final Options options;
    public static final Logger logger = Logger.getLogger(CoNLLDocumentReader.class.getName());
    private static final HeadFinder chineseHeadFinder = new ChineseSemanticHeadFinder();

    public CoNLLDocumentReader(String filepath) {
        this(filepath, new Options());
    }

    public CoNLLDocumentReader(String filepath, Options options) {
        this.fileList = CoNLLDocumentReader.getFiles(filepath, options.filePattern);
        this.options = options;
        if (options.sortFiles) {
            Collections.sort(this.fileList);
        }
        this.curFileIndex = 0;
        logger.info("Reading " + this.fileList.size() + " CoNLL files from " + filepath);
    }

    private static List<File> getFiles(String filepath, Pattern filter) {
        Iterable<File> iter = IOUtils.iterFilesRecursive(new File(filepath), filter);
        ArrayList<File> fileList = new ArrayList<File>();
        for (File f : iter) {
            fileList.add(f);
        }
        Collections.sort(fileList);
        return fileList;
    }

    @Override
    public void reset() {
        this.curFileIndex = 0;
        if (this.docIterator != null) {
            this.docIterator.close();
            this.docIterator = null;
        }
    }

    public CoNLLDocument getNextDocument() {
        try {
            if (this.curFileIndex >= this.fileList.size()) {
                return null;
            }
            File curFile = this.fileList.get(this.curFileIndex);
            if (this.docIterator == null) {
                this.docIterator = new DocumentIterator(curFile.getAbsolutePath(), this.options);
            }
            while (!this.docIterator.hasNext()) {
                Redwood.log("debug-docreader", "Processed " + this.docIterator.docCnt + " documents in " + curFile.getAbsolutePath());
                this.docIterator.close();
                ++this.curFileIndex;
                if (this.curFileIndex >= this.fileList.size()) {
                    return null;
                }
                curFile = this.fileList.get(this.curFileIndex);
                this.docIterator = new DocumentIterator(curFile.getAbsolutePath(), this.options);
            }
            CoNLLDocument next = this.docIterator.next();
            Redwood.log("debug-docreader", "Reading document: " + next.getDocumentID() + " part: " + next.getPartNo());
            return next;
        }
        catch (IOException ex) {
            throw new RuntimeIOException(ex);
        }
    }

    public void close() {
        IOUtils.closeIgnoringExceptions(this.docIterator);
    }

    private static String getField(String[] fields, int pos) {
        if (pos == -1) {
            return fields[fields.length - 1];
        }
        return fields[pos];
    }

    private static String concatField(List<String[]> sentWords, int pos) {
        StringBuilder sb = new StringBuilder();
        for (String[] fields : sentWords) {
            if (sb.length() > 0) {
                sb.append(' ');
            }
            sb.append(CoNLLDocumentReader.getField(fields, pos));
        }
        return sb.toString();
    }

    public static void usage() {
        System.err.println("java edu.stanford.nlp.dcoref.CoNLL2011DocumentReader [-ext <extension to match>] -i <inputpath> -o <outputfile>");
    }

    public static Pair<Integer, Integer> getMention(Integer index, String corefG, List<CoreLabel> sentenceAnno) {
        Integer i = -1;
        Integer end = index;
        for (CoreLabel newAnno : sentenceAnno) {
            String[] allC;
            if ((i = Integer.valueOf(i + 1)) <= index) continue;
            String corefS = (String)newAnno.get(CorefCoreAnnotations.CorefAnnotation.class);
            if (corefS == null || !Arrays.asList(allC = corefS.split("\\|")).contains(corefG)) break;
            end = i;
        }
        return Pair.makePair(index, end);
    }

    public static boolean include(Map<Pair<Integer, Integer>, String> sentenceInfo, Pair<Integer, Integer> mention, String corefG) {
        Set<Pair<Integer, Integer>> keys = sentenceInfo.keySet();
        for (Pair<Integer, Integer> key : keys) {
            String corefS = sentenceInfo.get(key);
            if (corefS == null || !corefS.equals(corefG) || (Integer)key.first >= (Integer)mention.first || !((Integer)key.second).equals(mention.second)) continue;
            return true;
        }
        return false;
    }

    public static void writeTabSep(PrintWriter pw, CoreMap sentence, CollectionValuedMap<String, CoreMap> chainmap) {
        ModCollinsHeadFinder headFinder = new ModCollinsHeadFinder();
        List sentenceAnno = (List)sentence.get(CoreAnnotations.TokensAnnotation.class);
        Tree sentenceTree = (Tree)sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
        Map<Pair<Integer, Integer>, String> sentenceInfo = Generics.newHashMap();
        Set<Tree> sentenceSubTrees = sentenceTree.subTrees();
        sentenceTree.setSpans();
        Map<Pair<Integer, Integer>, Tree> treeSpanMap = Generics.newHashMap();
        Map wordSpanMap = Generics.newHashMap();
        for (Tree ctree : sentenceSubTrees) {
            IntPair span = ctree.getSpan();
            if (span == null) continue;
            treeSpanMap.put(Pair.makePair(span.getSource(), span.getTarget()), ctree);
            wordSpanMap.put(Pair.makePair(span.getSource(), span.getTarget()), ctree.getLeaves());
        }
        String[][] finalSentence = new String[sentenceAnno.size()][];
        Map<Pair<Integer, Integer>, String> allHeads = Generics.newHashMap();
        int index = -1;
        for (CoreLabel newAnno : sentenceAnno) {
            String[] allC;
            String word = newAnno.word();
            String tag = newAnno.tag();
            String cat = newAnno.ner();
            String coref = (String)newAnno.get(CorefCoreAnnotations.CorefAnnotation.class);
            finalSentence[++index] = new String[4];
            finalSentence[index][0] = word;
            finalSentence[index][1] = tag;
            finalSentence[index][2] = cat;
            finalSentence[index][3] = coref;
            if (coref == null) {
                sentenceInfo.put(Pair.makePair(index, index), coref);
                finalSentence[index][3] = "O";
                continue;
            }
            for (String corefG : allC = coref.split("\\|")) {
                Pair<Integer, Integer> mention = CoNLLDocumentReader.getMention(index, corefG, sentenceAnno);
                if (CoNLLDocumentReader.include(sentenceInfo, mention, corefG)) continue;
                sentenceInfo.put(mention, corefG);
                Tree mentionTree = (Tree)treeSpanMap.get(mention);
                String head = null;
                if (mentionTree != null) {
                    head = mentionTree.headTerminal(headFinder).nodeString();
                } else if (((Integer)mention.first).equals(mention.second)) {
                    head = word;
                }
                allHeads.put(mention, head);
            }
            if (allHeads.values().contains(word)) {
                finalSentence[index][3] = "MENTION";
                continue;
            }
            finalSentence[index][3] = "O";
        }
        for (int i = 0; i < finalSentence.length; ++i) {
            String[] nextWordInfo;
            String[] wordInfo = finalSentence[i];
            if (i < finalSentence.length - 1 && (nextWordInfo = finalSentence[i + 1])[3].equals("MENTION") && nextWordInfo[0].equals("'s")) {
                wordInfo[3] = "MENTION";
                finalSentence[i + 1][3] = "O";
            }
            pw.println(wordInfo[0] + "\t" + wordInfo[1] + "\t" + wordInfo[2] + "\t" + wordInfo[3]);
        }
        pw.println("");
    }

    public static void main(String[] args) throws IOException {
        CoNLLDocument doc;
        Properties props = StringUtils.argsToProperties(args);
        boolean debug = CorefProperties.debug(props);
        String filepath = props.getProperty("i");
        String outfile = props.getProperty("o");
        if (filepath == null || outfile == null) {
            CoNLLDocumentReader.usage();
            System.exit(-1);
        }
        PrintWriter fout = new PrintWriter(outfile);
        logger.info("Writing to " + outfile);
        String ext = props.getProperty("ext");
        Options options = ext != null ? new Options(".*" + ext + "$") : new Options();
        options.annotateTreeCoref = true;
        options.annotateTreeNer = true;
        CorpusStats corpusStats = new CorpusStats();
        CoNLLDocumentReader reader = new CoNLLDocumentReader(filepath, options);
        int docCnt = 0;
        int sentCnt = 0;
        int tokenCnt = 0;
        while ((doc = reader.getNextDocument()) != null) {
            corpusStats.process(doc);
            ++docCnt;
            Annotation anno = doc.getAnnotation();
            if (debug) {
                System.out.println("Document " + docCnt + ": " + (String)anno.get(CoreAnnotations.DocIDAnnotation.class));
            }
            for (CoreMap sentence : (List)anno.get(CoreAnnotations.SentencesAnnotation.class)) {
                if (debug) {
                    System.out.println("Parse: " + sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
                }
                if (debug) {
                    System.out.println("Sentence Tokens: " + StringUtils.join((Iterable)sentence.get(CoreAnnotations.TokensAnnotation.class), ","));
                }
                CoNLLDocumentReader.writeTabSep(fout, sentence, doc.corefChainMap);
                ++sentCnt;
                tokenCnt += ((List)sentence.get(CoreAnnotations.TokensAnnotation.class)).size();
            }
            if (!debug) continue;
            for (CoreMap ner : doc.nerChunks) {
                System.out.println("NER Chunk: " + ner);
            }
            for (String id : doc.corefChainMap.keySet()) {
                System.out.println("Coref: " + id + " = " + StringUtils.join(doc.corefChainMap.get(id), ";"));
            }
        }
        fout.close();
        System.out.println("Total document count: " + docCnt);
        System.out.println("Total sentence count: " + sentCnt);
        System.out.println("Total token count: " + tokenCnt);
        System.out.println(corpusStats);
    }

    @Override
    public InputDoc nextDoc() {
        CoNLLDocument conllDoc = this.getNextDocument();
        if (conllDoc == null) {
            return null;
        }
        Annotation anno = conllDoc.getAnnotation();
        this.setDependencyTree(anno);
        List<List<Mention>> allGoldMentions = this.extractGoldMentions(conllDoc);
        Map<String, String> docInfo = this.makeDocInfo(conllDoc);
        return new InputDoc(anno, docInfo, allGoldMentions, conllDoc);
    }

    private Map<String, String> makeDocInfo(CoNLLDocument conllDoc) {
        Map<String, String> docInfo = Generics.newHashMap();
        docInfo.put("DOC_ID", conllDoc.documentID);
        docInfo.put("DOC_PART", conllDoc.partNo);
        docInfo.put("DOC_ID_PART", conllDoc.documentIdPart);
        docInfo.put("DOC_FILE", conllDoc.filename);
        return docInfo;
    }

    private void setDependencyTree(Annotation anno) {
        List sentences = (List)anno.get(CoreAnnotations.SentencesAnnotation.class);
        for (CoreMap sentence : sentences) {
            Tree tree = (Tree)sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
            if (tree == null) continue;
            SemanticGraph deps = null;
            SemanticGraph basicDeps = null;
            if (this.options.lang == Locale.CHINESE) {
                boolean threadSafe = true;
                deps = SemanticGraphFactory.makeFromTree((GrammaticalStructure)new ChineseGrammaticalStructure(tree, Filters.acceptFilter(), chineseHeadFinder), SemanticGraphFactory.Mode.COLLAPSED, GrammaticalStructure.Extras.NONE, true, null);
                basicDeps = SemanticGraphFactory.makeFromTree((GrammaticalStructure)new ChineseGrammaticalStructure(tree, Filters.acceptFilter(), chineseHeadFinder), SemanticGraphFactory.Mode.BASIC, GrammaticalStructure.Extras.NONE, true, null);
            } else {
                deps = SemanticGraphFactory.makeFromTree(tree, true);
                basicDeps = SemanticGraphFactory.makeFromTree(tree, false);
            }
            sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, basicDeps);
            sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps);
        }
    }

    public List<List<Mention>> extractGoldMentions(CoNLLDocument conllDoc) {
        List sentences = (List)conllDoc.getAnnotation().get(CoreAnnotations.SentencesAnnotation.class);
        ArrayList<List<Mention>> allGoldMentions = new ArrayList<List<Mention>>();
        CollectionValuedMap<String, CoreMap> corefChainMap = conllDoc.getCorefChainMap();
        for (int i = 0; i < sentences.size(); ++i) {
            allGoldMentions.add(new ArrayList());
        }
        for (String corefIdStr : corefChainMap.keySet()) {
            int id = Integer.parseInt(corefIdStr);
            Iterator iterator = corefChainMap.get(corefIdStr).iterator();
            while (iterator.hasNext()) {
                CoreMap m = (CoreMap)iterator.next();
                Mention mention = new Mention();
                mention.goldCorefClusterID = id;
                int sentIndex = (Integer)m.get(CoreAnnotations.SentenceIndexAnnotation.class);
                CoreMap sent = (CoreMap)sentences.get(sentIndex);
                mention.startIndex = (Integer)m.get(CoreAnnotations.TokenBeginAnnotation.class) - (Integer)sent.get(CoreAnnotations.TokenBeginAnnotation.class);
                mention.endIndex = (Integer)m.get(CoreAnnotations.TokenEndAnnotation.class) - (Integer)sent.get(CoreAnnotations.TokenBeginAnnotation.class);
                mention.originalSpan = (List)m.get(CoreAnnotations.TokensAnnotation.class);
                ((List)allGoldMentions.get(sentIndex)).add(mention);
            }
        }
        return allGoldMentions;
    }

    public static class CorpusStats {
        IntCounter<String> mentionTreeLabelCounter = new IntCounter();
        IntCounter<String> mentionTreeNonPretermLabelCounter = new IntCounter();
        IntCounter<String> mentionTreePretermNonPretermNoMatchLabelCounter = new IntCounter();
        IntCounter<String> mentionTreeMixedLabelCounter = new IntCounter();
        IntCounter<Integer> mentionTokenLengthCounter = new IntCounter();
        IntCounter<Integer> nerMentionTokenLengthCounter = new IntCounter();
        int mentionExactTreeSpan = 0;
        int nonPretermSpanMatches = 0;
        int totalMentions = 0;
        int nestedNerMentions = 0;
        int nerMentions = 0;

        public void process(CoNLLDocument doc) {
            List sentences = (List)doc.getAnnotation().get(CoreAnnotations.SentencesAnnotation.class);
            for (String id : doc.corefChainMap.keySet()) {
                Object mentions = doc.corefChainMap.get(id);
                Iterator iterator = mentions.iterator();
                block1: while (iterator.hasNext()) {
                    Label tlabel;
                    Tree t;
                    CoreMap m = (CoreMap)iterator.next();
                    CoreMap sent = (CoreMap)sentences.get((Integer)m.get(CoreAnnotations.SentenceIndexAnnotation.class));
                    Tree root = (Tree)sent.get(TreeCoreAnnotations.TreeAnnotation.class);
                    Tree npt = t = (Tree)m.get(TreeCoreAnnotations.TreeAnnotation.class);
                    Tree npt2 = t;
                    if (npt.isPreTerminal()) {
                        npt = npt.parent(root);
                    }
                    int sentTokenStart = (Integer)sent.get(CoreAnnotations.TokenBeginAnnotation.class);
                    int tokenStart = (Integer)m.get(CoreAnnotations.TokenBeginAnnotation.class) - sentTokenStart;
                    int tokenEnd = (Integer)m.get(CoreAnnotations.TokenEndAnnotation.class) - sentTokenStart;
                    int length = tokenEnd - tokenStart;
                    this.mentionTokenLengthCounter.incrementCount(length);
                    IntPair span = t.getSpan();
                    if (span != null) {
                        if (span.getSource() == tokenStart && span.getTarget() == tokenEnd - 1) {
                            ++this.mentionExactTreeSpan;
                        } else {
                            logger.info("Tree span is " + span + ", tree node is " + t);
                            logger.info("Mention span is " + tokenStart + " " + (tokenEnd - 1) + ", mention is " + m);
                        }
                    } else {
                        logger.warning("No span for " + t);
                    }
                    IntPair nptSpan = npt.getSpan();
                    if (nptSpan.getSource() == tokenStart && nptSpan.getTarget() == tokenEnd - 1) {
                        ++this.nonPretermSpanMatches;
                        npt2 = npt;
                    } else {
                        this.mentionTreePretermNonPretermNoMatchLabelCounter.incrementCount(t.label().value());
                        logger.info("NPT: Tree span is " + span + ", tree node is " + npt);
                        logger.info("NPT: Mention span is " + tokenStart + " " + (tokenEnd - 1) + ", mention is " + m);
                        tlabel = t.label();
                        if (tlabel instanceof CoreLabel) {
                            CoreMap mention = (CoreMap)((CoreLabel)tlabel).get(CorefMentionAnnotation.class);
                            String corefClusterId = (String)mention.get(CorefCoreAnnotations.CorefAnnotation.class);
                            Object clusteredMentions = doc.corefChainMap.get(corefClusterId);
                            Iterator iterator2 = clusteredMentions.iterator();
                            while (iterator2.hasNext()) {
                                CoreMap m2 = (CoreMap)iterator2.next();
                                logger.info("NPT: Clustered mention " + (String)m2.get(CoreAnnotations.TextAnnotation.class));
                            }
                        }
                    }
                    ++this.totalMentions;
                    this.mentionTreeLabelCounter.incrementCount(t.label().value());
                    this.mentionTreeNonPretermLabelCounter.incrementCount(npt.label().value());
                    this.mentionTreeMixedLabelCounter.incrementCount(npt2.label().value());
                    tlabel = t.label();
                    if (!(tlabel instanceof CoreLabel) || !((CoreLabel)tlabel).containsKey(NamedEntityAnnotation.class)) continue;
                    ++this.nerMentions;
                    this.nerMentionTokenLengthCounter.incrementCount(length);
                    for (Tree parent = t.parent(root); parent != null; parent = parent.parent(root)) {
                        Label plabel = parent.label();
                        if (!(plabel instanceof CoreLabel) || !((CoreLabel)plabel).containsKey(NamedEntityAnnotation.class)) continue;
                        logger.info("NER Mention: " + m);
                        CoreMap parentNerChunk = (CoreMap)((CoreLabel)plabel).get(NamedEntityAnnotation.class);
                        logger.info("Nested inside NER Mention: " + parentNerChunk);
                        logger.info("Nested inside NER Mention parent node: " + parent);
                        ++this.nestedNerMentions;
                        continue block1;
                    }
                }
            }
        }

        private static void appendFrac(StringBuilder sb, String label, int num, int den) {
            double frac = (double)num / (double)den;
            sb.append(label).append("\t").append(frac).append("\t(").append(num).append("/").append(den).append(")");
        }

        private static <E> void appendIntCountStats(StringBuilder sb, String label, IntCounter<E> counts) {
            sb.append(label).append("\n");
            List<E> sortedKeys = Counters.toSortedList(counts);
            int total = counts.totalIntCount();
            for (E key : sortedKeys) {
                int count = counts.getIntCount(key);
                CorpusStats.appendFrac(sb, key.toString(), count, total);
                sb.append("\n");
            }
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            CorpusStats.appendIntCountStats(sb, "Mention Tree Labels (no preterminals)", this.mentionTreeNonPretermLabelCounter);
            sb.append("\n");
            CorpusStats.appendIntCountStats(sb, "Mention Tree Labels (with preterminals)", this.mentionTreeLabelCounter);
            sb.append("\n");
            CorpusStats.appendIntCountStats(sb, "Mention Tree Labels (preterminals with parent span not match)", this.mentionTreePretermNonPretermNoMatchLabelCounter);
            sb.append("\n");
            CorpusStats.appendIntCountStats(sb, "Mention Tree Labels (mixed)", this.mentionTreeMixedLabelCounter);
            sb.append("\n");
            CorpusStats.appendIntCountStats(sb, "Mention Lengths", this.mentionTokenLengthCounter);
            sb.append("\n");
            CorpusStats.appendFrac(sb, "Mention Exact Non Preterm Tree Span", this.nonPretermSpanMatches, this.totalMentions);
            sb.append("\n");
            CorpusStats.appendFrac(sb, "Mention Exact Tree Span", this.mentionExactTreeSpan, this.totalMentions);
            sb.append("\n");
            CorpusStats.appendFrac(sb, "NER", this.nerMentions, this.totalMentions);
            sb.append("\n");
            CorpusStats.appendFrac(sb, "Nested NER", this.nestedNerMentions, this.totalMentions);
            sb.append("\n");
            CorpusStats.appendIntCountStats(sb, "NER Mention Lengths", this.nerMentionTokenLengthCounter);
            return sb.toString();
        }
    }

    private static class DocumentIterator
    extends AbstractIterator<CoNLLDocument>
    implements Closeable {
        private static final Pattern delimiterPattern = Pattern.compile("\\s+");
        private static final LabeledScoredTreeReaderFactory treeReaderFactory = new LabeledScoredTreeReaderFactory((TreeNormalizer)null);
        private final Options options;
        String filename;
        BufferedReader br;
        CoNLLDocument nextDoc;
        int lineCnt = 0;
        int docCnt = 0;
        private static final Pattern starPattern = Pattern.compile("\\*");
        private static final String ASTERISK = "*";
        private static final String HYPHEN = "-";
        private static final String docStart = "#begin document ";
        private static final int docStartLength = "#begin document ".length();

        public DocumentIterator(String filename, Options options) throws IOException {
            this.options = options;
            this.filename = filename;
            this.br = IOUtils.getBufferedFileReader(filename);
            this.nextDoc = this.readNextDocument();
        }

        @Override
        public boolean hasNext() {
            return this.nextDoc != null;
        }

        @Override
        public CoNLLDocument next() {
            if (this.nextDoc == null) {
                throw new NoSuchElementException("DocumentIterator exhausted.");
            }
            CoNLLDocument curDoc = this.nextDoc;
            this.nextDoc = this.readNextDocument();
            return curDoc;
        }

        private static Tree wordsToParse(List<String[]> sentWords) {
            StringBuilder sb = new StringBuilder();
            for (String[] fields : sentWords) {
                if (sb.length() > 0) {
                    sb.append(' ');
                }
                String str = fields[5].replace("NOPARSE", "X");
                String tagword = "(" + fields[4] + " " + fields[3] + ")";
                int si = str.indexOf(42);
                sb.append(str.substring(0, si));
                sb.append(tagword);
                sb.append(str.substring(si + 1));
                if ((si = str.indexOf(42, si + 1)) < 0) continue;
                logger.warning(" Parse bit with multiple *: " + str);
            }
            String parseStr = sb.toString();
            return Tree.valueOf(parseStr, treeReaderFactory);
        }

        private static List<Triple<Integer, Integer, String>> getCorefSpans(List<String[]> sentWords) {
            return DocumentIterator.getLabelledSpans(sentWords, -1, HYPHEN, true);
        }

        private static List<Triple<Integer, Integer, String>> getNerSpans(List<String[]> sentWords) {
            return DocumentIterator.getLabelledSpans(sentWords, 10, ASTERISK, false);
        }

        private static List<Triple<Integer, Integer, String>> getLabelledSpans(List<String[]> sentWords, int fieldIndex, String defaultMarker, boolean checkEndLabel) {
            ArrayList<Triple<Integer, Integer, String>> spans = new ArrayList<Triple<Integer, Integer, String>>();
            Stack<Triple<Integer, Integer, String>> openSpans = new Stack<Triple<Integer, Integer, String>>();
            boolean removeStar = ASTERISK.equals(defaultMarker);
            for (int wordPos = 0; wordPos < sentWords.size(); ++wordPos) {
                String[] fields = sentWords.get(wordPos);
                String val = CoNLLDocumentReader.getField(fields, fieldIndex);
                if (defaultMarker.equals(val)) continue;
                int openParenIndex = -1;
                int lastDelimiterIndex = -1;
                for (int j = 0; j < val.length(); ++j) {
                    char c = val.charAt(j);
                    boolean isDelimiter = false;
                    if (c == '(' || c == ')' || c == '|') {
                        if (openParenIndex >= 0) {
                            String s = val.substring(openParenIndex + 1, j);
                            if (removeStar) {
                                s = starPattern.matcher(s).replaceAll("");
                            }
                            openSpans.push(new Triple<Integer, Integer, String>(wordPos, -1, s));
                            openParenIndex = -1;
                        }
                        isDelimiter = true;
                    }
                    if (c == '(') {
                        openParenIndex = j;
                    } else if (c == ')') {
                        String s;
                        Triple t = (Triple)openSpans.pop();
                        if (checkEndLabel && !(s = val.substring(lastDelimiterIndex + 1, j)).equals(t.third())) {
                            Stack<Triple> saved = new Stack<Triple>();
                            while (!s.equals(t.third())) {
                                saved.push(t);
                                if (openSpans.isEmpty()) {
                                    throw new RuntimeException("Cannot find matching labelled span for " + s);
                                }
                                t = (Triple)openSpans.pop();
                            }
                            while (!saved.isEmpty()) {
                                openSpans.push((Triple<Integer, Integer, String>)saved.pop());
                            }
                            assert (s.equals(t.third()));
                        }
                        t.setSecond(wordPos);
                        spans.add(t);
                    }
                    if (!isDelimiter) continue;
                    lastDelimiterIndex = j;
                }
                if (openParenIndex < 0) continue;
                String s = val.substring(openParenIndex + 1, val.length());
                if (removeStar) {
                    s = starPattern.matcher(s).replaceAll("");
                }
                openSpans.push(new Triple<Integer, Integer, String>(wordPos, -1, s));
            }
            if (openSpans.size() != 0) {
                throw new RuntimeException("Error extracting labelled spans for column " + fieldIndex + ": " + CoNLLDocumentReader.concatField(sentWords, fieldIndex));
            }
            return spans;
        }

        private CoreMap wordsToSentence(List<String[]> sentWords) {
            CoreLabel token;
            Tree leaf;
            int i;
            String label;
            String sentText = CoNLLDocumentReader.concatField(sentWords, 3);
            Annotation sentence = new Annotation(sentText);
            Tree tree = DocumentIterator.wordsToParse(sentWords);
            sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
            List leaves = tree.getLeaves();
            assert (leaves.size() == sentWords.size());
            ArrayList<CoreLabel> tokens = new ArrayList<CoreLabel>(leaves.size());
            sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
            for (int i2 = 0; i2 < sentWords.size(); ++i2) {
                String speaker;
                String[] fields = sentWords.get(i2);
                int wordPos = Integer.parseInt(fields[2]);
                assert (wordPos == i2);
                Tree leaf2 = (Tree)leaves.get(i2);
                CoreLabel token2 = (CoreLabel)leaf2.label();
                tokens.add(token2);
                if (!this.options.annotateTokenSpeaker || HYPHEN.equals(speaker = fields[9].replace("_", " "))) continue;
                token2.set(CoreAnnotations.SpeakerAnnotation.class, speaker);
            }
            if (this.options.annotateTokenPos) {
                for (Tree leaf3 : leaves) {
                    CoreLabel token3 = (CoreLabel)leaf3.label();
                    token3.set(CoreAnnotations.PartOfSpeechAnnotation.class, leaf3.parent(tree).value());
                }
            }
            if (this.options.annotateTokenNer) {
                List<Triple<Integer, Integer, String>> nerSpans = DocumentIterator.getNerSpans(sentWords);
                for (Triple<Integer, Integer, String> nerSpan : nerSpans) {
                    int startToken = nerSpan.first();
                    int endToken = nerSpan.second();
                    label = nerSpan.third();
                    for (i = startToken; i <= endToken; ++i) {
                        leaf = (Tree)leaves.get(i);
                        token = (CoreLabel)leaf.label();
                        String oldLabel = (String)token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                        if (oldLabel != null) {
                            logger.warning("Replacing old named entity tag " + oldLabel + " with " + label);
                        }
                        token.set(CoreAnnotations.NamedEntityTagAnnotation.class, label);
                    }
                }
                for (CoreLabel token4 : tokens) {
                    if (token4.containsKey(CoreAnnotations.NamedEntityTagAnnotation.class)) continue;
                    token4.set(CoreAnnotations.NamedEntityTagAnnotation.class, this.options.backgroundNerTag);
                }
            }
            if (this.options.annotateTokenCoref) {
                List<Triple<Integer, Integer, String>> corefSpans = DocumentIterator.getCorefSpans(sentWords);
                for (Triple<Integer, Integer, String> corefSpan : corefSpans) {
                    int startToken = corefSpan.first();
                    int endToken = corefSpan.second();
                    label = corefSpan.third();
                    for (i = startToken; i <= endToken; ++i) {
                        String oldLabel;
                        leaf = (Tree)leaves.get(i);
                        token = (CoreLabel)leaf.label();
                        String curLabel = label;
                        if (this.options.useCorefBIOESEncoding) {
                            String prefix = startToken == endToken ? "S-" : (i == startToken ? "B-" : (i == endToken ? "E-" : "I-"));
                            curLabel = prefix + label;
                        }
                        if ((oldLabel = (String)token.get(CorefCoreAnnotations.CorefAnnotation.class)) != null) {
                            curLabel = oldLabel + "|" + curLabel;
                        }
                        token.set(CorefCoreAnnotations.CorefAnnotation.class, curLabel);
                    }
                }
            }
            return sentence;
        }

        public static Annotation sentencesToDocument(String documentID, List<CoreMap> sentences) {
            String docText = null;
            Annotation document = new Annotation(docText);
            document.set(CoreAnnotations.DocIDAnnotation.class, documentID);
            document.set(CoreAnnotations.SentencesAnnotation.class, sentences);
            ArrayList docTokens = new ArrayList();
            int sentenceIndex = 0;
            int tokenBegin = 0;
            for (CoreMap sentenceAnnotation : sentences) {
                List sentenceTokens = (List)sentenceAnnotation.get(CoreAnnotations.TokensAnnotation.class);
                docTokens.addAll(sentenceTokens);
                int tokenEnd = tokenBegin + sentenceTokens.size();
                sentenceAnnotation.set(CoreAnnotations.TokenBeginAnnotation.class, tokenBegin);
                sentenceAnnotation.set(CoreAnnotations.TokenEndAnnotation.class, tokenEnd);
                sentenceAnnotation.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex);
                ++sentenceIndex;
                tokenBegin = tokenEnd;
            }
            document.set(CoreAnnotations.TokensAnnotation.class, docTokens);
            int i = 0;
            for (CoreLabel token : docTokens) {
                String tokenText = (String)token.get(CoreAnnotations.TextAnnotation.class);
                token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, i);
                token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, i += tokenText.length());
                ++i;
            }
            for (CoreMap sentenceAnnotation : sentences) {
                List sentenceTokens = (List)sentenceAnnotation.get(CoreAnnotations.TokensAnnotation.class);
                sentenceAnnotation.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, ((CoreLabel)sentenceTokens.get(0)).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
                sentenceAnnotation.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, ((CoreLabel)sentenceTokens.get(sentenceTokens.size() - 1)).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
            }
            return document;
        }

        private static Tree getLowestCommonAncestor(Tree root, int startToken, int endToken) {
            Tree leftLeaf = Trees.getLeaf(root, startToken);
            Tree rightLeaf = Trees.getLeaf(root, endToken);
            return Trees.getLowestCommonAncestor(leftLeaf, rightLeaf, root);
        }

        private static Tree getTreeNonTerminal(Tree root, int startToken, int endToken, boolean acceptPreTerminals) {
            Tree t = DocumentIterator.getLowestCommonAncestor(root, startToken, endToken);
            if (t.isLeaf()) {
                t = t.parent(root);
            }
            if (!acceptPreTerminals && t.isPreTerminal()) {
                t = t.parent(root);
            }
            return t;
        }

        public void annotateDocument(CoNLLDocument document) {
            ArrayList<CoreMap> sentences = new ArrayList<CoreMap>(document.sentenceWordLists.size());
            for (List<String[]> sentWords : document.sentenceWordLists) {
                sentences.add(this.wordsToSentence(sentWords));
            }
            Annotation docAnnotation = DocumentIterator.sentencesToDocument(document.documentIdPart, sentences);
            document.setAnnotation(docAnnotation);
            CollectionValuedMap corefChainMap = new CollectionValuedMap(CollectionFactory.arrayListFactory());
            ArrayList<CoreMap> nerChunks = new ArrayList<CoreMap>();
            for (int i = 0; i < sentences.size(); ++i) {
                CoreMap sentence = (CoreMap)sentences.get(i);
                Tree tree = (Tree)sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
                tree.setSpans();
                List<String[]> sentWords = document.sentenceWordLists.get(i);
                List<Triple<Integer, Integer, String>> nerSpans = DocumentIterator.getNerSpans(sentWords);
                for (Triple<Integer, Integer, String> nerSpan : nerSpans) {
                    Label tlabel;
                    int startToken = nerSpan.first();
                    int endToken = nerSpan.second();
                    String label = nerSpan.third();
                    Annotation nerChunk = ChunkAnnotationUtils.getAnnotatedChunk(sentence, startToken, endToken + 1);
                    nerChunk.set(CoreAnnotations.NamedEntityTagAnnotation.class, label);
                    nerChunk.set(CoreAnnotations.SentenceIndexAnnotation.class, sentence.get(CoreAnnotations.SentenceIndexAnnotation.class));
                    nerChunks.add(nerChunk);
                    Tree t = DocumentIterator.getTreeNonTerminal(tree, startToken, endToken, true);
                    if (t.getSpan().getSource() != startToken || t.getSpan().getTarget() != endToken) continue;
                    nerChunk.set(TreeCoreAnnotations.TreeAnnotation.class, t);
                    if (!this.options.annotateTreeNer || !((tlabel = t.label()) instanceof CoreLabel)) continue;
                    ((CoreLabel)tlabel).set(NamedEntityAnnotation.class, nerChunk);
                }
                List<Triple<Integer, Integer, String>> corefSpans = DocumentIterator.getCorefSpans(sentWords);
                for (Triple<Integer, Integer, String> corefSpan : corefSpans) {
                    Label tlabel;
                    int startToken = corefSpan.first();
                    int endToken = corefSpan.second();
                    String corefId = corefSpan.third();
                    Annotation mention = ChunkAnnotationUtils.getAnnotatedChunk(sentence, startToken, endToken + 1);
                    mention.set(CorefCoreAnnotations.CorefAnnotation.class, corefId);
                    mention.set(CoreAnnotations.SentenceIndexAnnotation.class, sentence.get(CoreAnnotations.SentenceIndexAnnotation.class));
                    corefChainMap.add(corefId, mention);
                    Tree t = DocumentIterator.getTreeNonTerminal(tree, startToken, endToken, true);
                    mention.set(TreeCoreAnnotations.TreeAnnotation.class, t);
                    if (!this.options.annotateTreeCoref || !((tlabel = t.label()) instanceof CoreLabel)) continue;
                    ((CoreLabel)tlabel).set(CorefMentionAnnotation.class, mention);
                }
            }
            document.corefChainMap = corefChainMap;
            document.nerChunks = nerChunks;
        }

        public CoNLLDocument readNextDocument() {
            try {
                String line;
                ArrayList<Object> curSentWords = new ArrayList<String[]>();
                CoNLLDocument document = null;
                while ((line = this.br.readLine()) != null) {
                    ++this.lineCnt;
                    if ((line = line.trim()).length() != 0) {
                        if (line.startsWith(docStart)) {
                            if (document != null) {
                                logger.warning("Unexpected begin document at line (\" + filename + \",\" + lineCnt + \")");
                            }
                            document = new CoNLLDocument();
                            document.filename = this.filename;
                            document.documentIdPart = line.substring(docStartLength);
                            continue;
                        }
                        if (line.startsWith("#end document")) {
                            this.annotateDocument(document);
                            ++this.docCnt;
                            return document;
                        }
                        assert (document != null);
                        String[] fields = delimiterPattern.split(line);
                        if (fields.length < 12) {
                            throw new RuntimeException("Unexpected number of field " + fields.length + ", expected >= " + 12 + " for line (" + this.filename + "," + this.lineCnt + "): " + line);
                        }
                        String curDocId = fields[0];
                        String partNo = fields[1];
                        if (document.getDocumentID() == null) {
                            document.setDocumentID(curDocId);
                            document.setPartNo(partNo);
                        } else {
                            assert (document.getDocumentID().equals(curDocId));
                            assert (document.getPartNo().equals(partNo));
                        }
                        curSentWords.add(fields);
                        continue;
                    }
                    if (curSentWords.size() <= 0) continue;
                    assert (document != null);
                    document.addSentence(curSentWords);
                    curSentWords = new ArrayList();
                }
            }
            catch (IOException ex) {
                throw new RuntimeIOException(ex);
            }
            return null;
        }

        @Override
        public void close() {
            IOUtils.closeIgnoringExceptions(this.br);
        }
    }

    public static class CoNLLDocument
    implements Serializable {
        private static final long serialVersionUID = 6287339385357914531L;
        String documentIdPart;
        public String documentID;
        String partNo;
        public String filename;
        public List<List<String[]>> sentenceWordLists = new ArrayList<List<String[]>>();
        Annotation annotation;
        CollectionValuedMap<String, CoreMap> corefChainMap;
        List<CoreMap> nerChunks;

        public String getDocumentID() {
            return this.documentID;
        }

        public void setDocumentID(String documentID) {
            this.documentID = documentID;
        }

        public String getPartNo() {
            return this.partNo;
        }

        public void setPartNo(String partNo) {
            this.partNo = partNo;
        }

        public List<List<String[]>> getSentenceWordLists() {
            return this.sentenceWordLists;
        }

        public void addSentence(List<String[]> sentence) {
            this.sentenceWordLists.add(sentence);
        }

        public Annotation getAnnotation() {
            return this.annotation;
        }

        public void setAnnotation(Annotation annotation) {
            this.annotation = annotation;
        }

        public CollectionValuedMap<String, CoreMap> getCorefChainMap() {
            return this.corefChainMap;
        }
    }

    public static class Options {
        public boolean useCorefBIOESEncoding = false;
        public boolean annotateTokenCoref = true;
        public boolean annotateTokenSpeaker = true;
        public boolean annotateTokenPos = true;
        public boolean annotateTokenNer = true;
        public boolean annotateTreeCoref = false;
        public boolean annotateTreeNer = false;
        public Locale lang = Locale.ENGLISH;
        public String backgroundNerTag = "O";
        protected String fileFilter;
        protected Pattern filePattern;
        protected boolean sortFiles;

        public Options() {
            this(".*_gold_conll$");
        }

        public Options(String filter) {
            this.fileFilter = filter;
            this.filePattern = Pattern.compile(this.fileFilter);
        }

        public void setFilter(String filter) {
            this.fileFilter = filter;
            this.filePattern = Pattern.compile(this.fileFilter);
        }
    }

    public static class CorefMentionAnnotation
    implements CoreAnnotation<CoreMap> {
        @Override
        public Class<CoreMap> getType() {
            return CoreMap.class;
        }
    }

    public static class NamedEntityAnnotation
    implements CoreAnnotation<CoreMap> {
        @Override
        public Class<CoreMap> getType() {
            return CoreMap.class;
        }
    }
}

