/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.crawl;

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.HashMap;
import java.util.Map;
import org.apache.avro.util.Utf8;
import org.apache.gora.mapreduce.GoraReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.nutch.crawl.GeneratorJob;
import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.TableUtil;
import org.apache.nutch.util.URLUtil;

public class GeneratorReducer
extends GoraReducer<GeneratorJob.SelectorEntry, WebPage, String, WebPage> {
    private long limit;
    private long maxCount;
    protected static long count = 0L;
    private boolean byDomain = false;
    private Map<String, Integer> hostCountMap = new HashMap<String, Integer>();
    private Utf8 batchId;

    protected void reduce(GeneratorJob.SelectorEntry key, Iterable<WebPage> values, Reducer.Context context) throws IOException, InterruptedException {
        for (WebPage page : values) {
            if (count >= this.limit) {
                return;
            }
            if (this.maxCount > 0L) {
                String hostordomain = this.byDomain ? URLUtil.getDomainName(key.url) : URLUtil.getHost(key.url);
                Integer hostCount = this.hostCountMap.get(hostordomain);
                if (hostCount == null) {
                    this.hostCountMap.put(hostordomain, 0);
                    hostCount = 0;
                }
                if ((long)hostCount.intValue() >= this.maxCount) {
                    return;
                }
                this.hostCountMap.put(hostordomain, hostCount + 1);
            }
            Mark.GENERATE_MARK.putMark(page, this.batchId);
            page.setBatchId((CharSequence)this.batchId);
            try {
                context.write((Object)TableUtil.reverseUrl(key.url), (Object)page);
            }
            catch (MalformedURLException e) {
                context.getCounter("Generator", "MALFORMED_URL").increment(1L);
                continue;
            }
            context.getCounter("Generator", "GENERATE_MARK").increment(1L);
            ++count;
        }
    }

    protected void setup(Reducer.Context context) throws IOException, InterruptedException {
        Configuration conf = context.getConfiguration();
        long totalLimit = conf.getLong("generate.topN", Long.MAX_VALUE);
        this.limit = totalLimit == Long.MAX_VALUE ? Long.MAX_VALUE : totalLimit / (long)context.getNumReduceTasks();
        this.maxCount = conf.getLong("generate.max.count", -2L);
        this.batchId = new Utf8(conf.get("generate.batch.id"));
        String countMode = conf.get("generate.count.mode", "host");
        if (countMode.equals("domain")) {
            this.byDomain = true;
        }
    }
}

