/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.french.pipeline;

import edu.stanford.nlp.international.french.pipeline.MWEPreprocessor;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.international.arabic.ATBTreeUtils;
import edu.stanford.nlp.trees.international.french.FrenchXMLTreeReaderFactory;
import edu.stanford.nlp.trees.treebank.AbstractDataset;
import edu.stanford.nlp.trees.treebank.DefaultMapper;
import edu.stanford.nlp.trees.tregex.TregexParseException;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.util.DataFilePaths;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Properties;
import java.util.Set;

public class FTBDataset
extends AbstractDataset {
    private static Redwood.RedwoodChannels log = Redwood.channels(FTBDataset.class);
    private boolean CC_TAGSET = false;
    private Set<String> splitSet;

    public FTBDataset() {
        this.treebank = new MemoryTreebank(new FrenchXMLTreeReaderFactory(this.CC_TAGSET), "UTF-8");
        this.treeFileExtension = "xml";
    }

    private String getCanditoTreeID(Tree t) {
        String ftbID;
        String fileName;
        String canditoName = null;
        if (t.label() instanceof CoreLabel) {
            fileName = ((CoreLabel)t.label()).docID();
            fileName = fileName.substring(0, fileName.lastIndexOf(46));
            ftbID = (String)((CoreLabel)t.label()).get(CoreAnnotations.SentenceIDAnnotation.class);
            if (fileName == null || ftbID == null) {
                throw new NullPointerException("fileName " + fileName + ", ftbID " + ftbID);
            }
        } else {
            throw new IllegalArgumentException("Trees constructed without CoreLabels! Can't extract metadata!");
        }
        canditoName = fileName + "-" + ftbID;
        return canditoName;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void build() {
        for (File path : this.pathsToData) {
            this.treebank.loadPath(path, this.treeFileExtension, false);
        }
        PrintWriter outfile = null;
        PrintWriter flatFile = null;
        try {
            outfile = new PrintWriter(new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(this.outFileName), "UTF-8")));
            flatFile = this.makeFlatFile ? new PrintWriter(new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(this.flatFileName), "UTF-8"))) : null;
            this.outputFileList.add(this.outFileName);
            if (this.makeFlatFile) {
                this.outputFileList.add(this.flatFileName);
                this.toStringBuffer.append(" Made flat files\n");
            }
            this.preprocessMWEs();
            ArrayList<TregexPattern> badTrees = new ArrayList<TregexPattern>();
            badTrees.add(TregexPattern.compile("@SENT <: @PUNC"));
            badTrees.add(TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC !<3 __"));
            badTrees.add(TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC <3 @PUNC <4 @PUNC !<5 __"));
            for (Tree t : this.treebank) {
                String canditoTreeID;
                TregexPattern p;
                boolean skipTree = false;
                Iterator iterator = badTrees.iterator();
                while (iterator.hasNext() && !(skipTree = (p = (TregexPattern)iterator.next()).matcher(t).find())) {
                }
                if (skipTree) {
                    log.info("Discarding tree: " + t.toString());
                    continue;
                }
                if (this.splitSet != null && !this.splitSet.contains(canditoTreeID = this.getCanditoTreeID(t))) continue;
                if (this.customTreeVisitor != null) {
                    this.customTreeVisitor.visitTree(t);
                }
                outfile.println(t.toString());
                if (!this.makeFlatFile) continue;
                String flatString = this.removeEscapeTokens ? ATBTreeUtils.unEscape(ATBTreeUtils.flattenTree(t)) : ATBTreeUtils.flattenTree(t);
                flatFile.println(flatString);
            }
        }
        catch (UnsupportedEncodingException e) {
            System.err.printf("%s: Filesystem does not support UTF-8 output%n", this.getClass().getName());
            e.printStackTrace();
        }
        catch (FileNotFoundException e) {
            System.err.printf("%s: Could not open %s for writing%n", this.getClass().getName(), this.outFileName);
        }
        catch (TregexParseException e) {
            System.err.printf("%s: Could not compile Tregex expressions%n", this.getClass().getName());
            e.printStackTrace();
        }
        finally {
            if (outfile != null) {
                outfile.close();
            }
            if (flatFile != null) {
                flatFile.close();
            }
        }
    }

    private void preprocessMWEs() {
        TwoDimensionalCounter<String, String> labelTerm = new TwoDimensionalCounter<String, String>();
        TwoDimensionalCounter<String, String> termLabel = new TwoDimensionalCounter<String, String>();
        TwoDimensionalCounter<String, String> labelPreterm = new TwoDimensionalCounter<String, String>();
        TwoDimensionalCounter<String, String> pretermLabel = new TwoDimensionalCounter<String, String>();
        TwoDimensionalCounter<String, String> unigramTagger = new TwoDimensionalCounter<String, String>();
        for (Tree t : this.treebank) {
            MWEPreprocessor.countMWEStatistics(t, unigramTagger, labelPreterm, pretermLabel, labelTerm, termLabel);
        }
        for (Tree t : this.treebank) {
            MWEPreprocessor.traverseAndFix(t, pretermLabel, unigramTagger);
        }
    }

    @Override
    public boolean setOptions(Properties opts) {
        boolean ret = super.setOptions(opts);
        if (opts.containsKey("SPLIT")) {
            String splitFileName = opts.getProperty("SPLIT");
            this.splitSet = this.makeSplitSet(splitFileName);
        }
        this.CC_TAGSET = PropertiesUtils.getBool(opts, "CC_TAGSET", false);
        this.treebank = new MemoryTreebank(new FrenchXMLTreeReaderFactory(this.CC_TAGSET), "UTF-8");
        if (this.lexMapper == null) {
            this.lexMapper = new DefaultMapper();
            this.lexMapper.setup(null, this.lexMapOptions.split(","));
        }
        if (this.pathsToMappings.size() != 0) {
            if (this.posMapper == null) {
                this.posMapper = new DefaultMapper();
            }
            for (File path : this.pathsToMappings) {
                this.posMapper.setup(path, new String[0]);
            }
        }
        return ret;
    }

    private Set<String> makeSplitSet(String splitFileName) {
        splitFileName = DataFilePaths.convert(splitFileName);
        Set<String> splitSet = Generics.newHashSet();
        LineNumberReader reader = null;
        try {
            String line;
            reader = new LineNumberReader(new FileReader(splitFileName));
            while ((line = reader.readLine()) != null) {
                splitSet.add(line.trim());
            }
            reader.close();
        }
        catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        catch (IOException e) {
            System.err.printf("%s: Error reading %s (line %d)%n", this.getClass().getName(), splitFileName, reader.getLineNumber());
            e.printStackTrace();
        }
        return splitSet;
    }
}

