/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.crawl;

import java.io.IOException;
import java.net.MalformedURLException;
import org.apache.gora.mapreduce.GoraMapper;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.nutch.crawl.DbUpdaterJob;
import org.apache.nutch.crawl.FetchSchedule;
import org.apache.nutch.crawl.FetchScheduleFactory;
import org.apache.nutch.crawl.GeneratorJob;
import org.apache.nutch.net.URLFilterException;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;
import org.apache.nutch.scoring.ScoringFilterException;
import org.apache.nutch.scoring.ScoringFilters;
import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.TableUtil;

public class GeneratorMapper
extends GoraMapper<String, WebPage, GeneratorJob.SelectorEntry, WebPage> {
    private URLFilters filters;
    private URLNormalizers normalizers;
    private boolean filter;
    private boolean normalise;
    private FetchSchedule schedule;
    private ScoringFilters scoringFilters;
    private long curTime;
    private GeneratorJob.SelectorEntry entry = new GeneratorJob.SelectorEntry();
    private int maxDistance;

    public void map(String reversedUrl, WebPage page, Mapper.Context context) throws IOException, InterruptedException {
        int distance;
        CharSequence distanceUtf8;
        String url = TableUtil.unreverseUrl(reversedUrl);
        if (Mark.GENERATE_MARK.checkMark(page) != null) {
            GeneratorJob.LOG.debug("Skipping {}; already generated", (Object)url);
            return;
        }
        if (this.maxDistance > -1 && (distanceUtf8 = page.getMarkers().get(DbUpdaterJob.DISTANCE)) != null && (distance = Integer.parseInt(distanceUtf8.toString())) > this.maxDistance) {
            return;
        }
        try {
            if (this.normalise) {
                url = this.normalizers.normalize(url, "generate_host_count");
            }
            if (this.filter && this.filters.filter(url) == null) {
                return;
            }
        }
        catch (URLFilterException e) {
            GeneratorJob.LOG.warn("Couldn't filter url: {} ({})", (Object)url, (Object)e.getMessage());
            return;
        }
        catch (MalformedURLException e) {
            GeneratorJob.LOG.warn("Couldn't filter url: {} ({})", (Object)url, (Object)e.getMessage());
            return;
        }
        if (!this.schedule.shouldFetch(url, page, this.curTime)) {
            if (GeneratorJob.LOG.isDebugEnabled()) {
                GeneratorJob.LOG.debug("-shouldFetch rejected '" + url + "', fetchTime=" + page.getFetchTime() + ", curTime=" + this.curTime);
            }
            return;
        }
        float score = page.getScore().floatValue();
        try {
            score = this.scoringFilters.generatorSortValue(url, page, score);
        }
        catch (ScoringFilterException e) {
            // empty catch block
        }
        this.entry.set(url, score);
        context.write((Object)this.entry, (Object)page);
    }

    public void setup(Mapper.Context context) {
        Configuration conf = context.getConfiguration();
        this.filter = conf.getBoolean("generate.filter", true);
        this.normalise = conf.getBoolean("generate.normalise", true);
        if (this.filter) {
            this.filters = new URLFilters(conf);
        }
        if (this.normalise) {
            this.normalizers = new URLNormalizers(conf, "generate_host_count");
        }
        this.maxDistance = conf.getInt("generate.max.distance", -1);
        this.curTime = conf.getLong("generate.curTime", System.currentTimeMillis());
        this.schedule = FetchScheduleFactory.getFetchSchedule(conf);
        this.scoringFilters = new ScoringFilters(conf);
    }
}

