/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Tag;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.parser.lexparser.AbstractUnknownWordModelTrainer;
import edu.stanford.nlp.parser.lexparser.ChineseLexicon;
import edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams;
import edu.stanford.nlp.parser.lexparser.ChineseUnknownWordModel;
import edu.stanford.nlp.parser.lexparser.IntTaggedWord;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.UnknownGTTrainer;
import edu.stanford.nlp.parser.lexparser.UnknownWordModel;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.Map;
import java.util.Set;

public class ChineseUnknownWordModelTrainer
extends AbstractUnknownWordModelTrainer {
    private static Redwood.RedwoodChannels log = Redwood.channels(ChineseUnknownWordModelTrainer.class);
    private ClassicCounter<IntTaggedWord> seenCounter;
    private ClassicCounter<IntTaggedWord> unSeenCounter;
    private Map<Label, ClassicCounter<String>> c;
    private ClassicCounter<Label> tc;
    private boolean useFirst;
    private boolean useGT;
    private boolean useUnicodeType;
    private Map<Label, ClassicCounter<String>> tagHash;
    private Set<String> seenFirst;
    private double indexToStartUnkCounting;
    private UnknownGTTrainer unknownGTTrainer;
    private IntTaggedWord iTotal = new IntTaggedWord(-1, -1);
    private UnknownWordModel model;

    @Override
    public void initializeTraining(Options op, Lexicon lex, Index<String> wordIndex, Index<String> tagIndex, double totalTrees) {
        super.initializeTraining(op, lex, wordIndex, tagIndex, totalTrees);
        boolean useGoodTuringUnknownWordModel = false;
        this.useFirst = true;
        boolean bl = this.useGT = op.lexOptions.useUnknownWordSignatures == 0;
        if (lex instanceof ChineseLexicon) {
            useGoodTuringUnknownWordModel = ((ChineseLexicon)lex).useGoodTuringUnknownWordModel;
        } else if (op.tlpParams instanceof ChineseTreebankParserParams) {
            useGoodTuringUnknownWordModel = ((ChineseTreebankParserParams)op.tlpParams).useGoodTuringUnknownWordModel;
        }
        if (useGoodTuringUnknownWordModel) {
            this.useGT = true;
            this.useFirst = false;
        }
        this.useUnicodeType = op.lexOptions.useUnicodeType;
        if (this.useFirst) {
            log.info("ChineseUWM: treating unknown word as the average of their equivalents by first-character identity. useUnicodeType: " + this.useUnicodeType);
        }
        if (this.useGT) {
            log.info("ChineseUWM: using Good-Turing smoothing for unknown words.");
        }
        this.c = Generics.newHashMap();
        this.tc = new ClassicCounter();
        this.unSeenCounter = new ClassicCounter();
        this.seenCounter = new ClassicCounter();
        this.seenFirst = Generics.newHashSet();
        this.tagHash = Generics.newHashMap();
        this.indexToStartUnkCounting = totalTrees * op.trainOptions.fractionBeforeUnseenCounting;
        this.unknownGTTrainer = this.useGT ? new UnknownGTTrainer() : null;
        Map<String, Float> unknownGT = null;
        if (this.useGT) {
            unknownGT = this.unknownGTTrainer.unknownGT;
        }
        this.model = new ChineseUnknownWordModel(op, lex, wordIndex, tagIndex, this.unSeenCounter, this.tagHash, unknownGT, this.useGT, this.seenFirst);
    }

    @Override
    public void train(TaggedWord tw, int loc, double weight) {
        char ch;
        int type;
        if (this.useGT) {
            this.unknownGTTrainer.train(tw, weight);
        }
        String word = tw.word();
        Tag tagL = new Tag(tw.tag());
        String first = word.substring(0, 1);
        if (this.useUnicodeType && (type = Character.getType(ch = word.charAt(0))) != 5) {
            first = Integer.toString(type);
        }
        String tag = tw.tag();
        if (!this.c.containsKey(tagL)) {
            this.c.put(tagL, new ClassicCounter());
        }
        this.c.get(tagL).incrementCount(first, weight);
        this.tc.incrementCount(tagL, weight);
        this.seenFirst.add(first);
        IntTaggedWord iW = new IntTaggedWord(word, ".*.", (Index<String>)this.wordIndex, (Index<String>)this.tagIndex);
        this.seenCounter.incrementCount(iW, weight);
        if (this.treesRead > this.indexToStartUnkCounting && this.seenCounter.getCount(iW) < 2.0) {
            IntTaggedWord iT = new IntTaggedWord(".*.", tag, (Index<String>)this.wordIndex, (Index<String>)this.tagIndex);
            this.unSeenCounter.incrementCount(iT, weight);
            this.unSeenCounter.incrementCount(this.iTotal, weight);
        }
    }

    @Override
    public UnknownWordModel finishTraining() {
        if (this.useGT) {
            this.unknownGTTrainer.finishTraining();
        }
        for (Label tagLab : this.c.keySet()) {
            ClassicCounter<String> wc = this.c.get(tagLab);
            if (!this.tagHash.containsKey(tagLab)) {
                this.tagHash.put(tagLab, new ClassicCounter());
            }
            this.tc.incrementCount(tagLab);
            wc.setCount("UNK", 1.0);
            for (String first : wc.keySet()) {
                double prob = Math.log(wc.getCount(first) / this.tc.getCount(tagLab));
                this.tagHash.get(tagLab).setCount(first, prob);
            }
        }
        return this.model;
    }
}

