/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.crawl;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import org.apache.avro.util.Utf8;
import org.apache.gora.mapreduce.GoraReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.util.StringUtils;
import org.apache.nutch.crawl.DbUpdaterJob;
import org.apache.nutch.crawl.FetchSchedule;
import org.apache.nutch.crawl.FetchScheduleFactory;
import org.apache.nutch.crawl.NutchWritable;
import org.apache.nutch.crawl.SignatureComparator;
import org.apache.nutch.crawl.UrlWithScore;
import org.apache.nutch.fetcher.FetcherJob;
import org.apache.nutch.net.protocols.HttpDateFormat;
import org.apache.nutch.scoring.ScoreDatum;
import org.apache.nutch.scoring.ScoringFilterException;
import org.apache.nutch.scoring.ScoringFilters;
import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.TableUtil;
import org.apache.nutch.util.WebPageWritable;
import org.slf4j.Logger;

public class DbUpdateReducer
extends GoraReducer<UrlWithScore, NutchWritable, String, WebPage> {
    public static final String CRAWLDB_ADDITIONS_ALLOWED = "db.update.additions.allowed";
    public static final Logger LOG = DbUpdaterJob.LOG;
    private int retryMax;
    private boolean additionsAllowed;
    private int maxInterval;
    private FetchSchedule schedule;
    private ScoringFilters scoringFilters;
    private List<ScoreDatum> inlinkedScoreData = new ArrayList<ScoreDatum>();
    private int maxLinks;

    protected void setup(Reducer.Context context) throws IOException, InterruptedException {
        Configuration conf = context.getConfiguration();
        this.retryMax = conf.getInt("db.fetch.retry.max", 3);
        this.additionsAllowed = conf.getBoolean(CRAWLDB_ADDITIONS_ALLOWED, true);
        this.maxInterval = conf.getInt("db.fetch.interval.max", 0);
        this.schedule = FetchScheduleFactory.getFetchSchedule(conf);
        this.scoringFilters = new ScoringFilters(conf);
        this.maxLinks = conf.getInt("db.update.max.inlinks", 10000);
    }

    protected void reduce(UrlWithScore key, Iterable<NutchWritable> values, Reducer.Context context) throws IOException, InterruptedException {
        String url;
        String keyUrl = key.getUrl().toString();
        WebPage page = null;
        this.inlinkedScoreData.clear();
        for (NutchWritable nutchWritable : values) {
            Writable val = nutchWritable.get();
            if (val instanceof WebPageWritable) {
                page = ((WebPageWritable)val).getWebPage();
                continue;
            }
            this.inlinkedScoreData.add((ScoreDatum)val);
            if (this.inlinkedScoreData.size() < this.maxLinks) continue;
            LOG.info("Limit reached, skipping further inlinks for " + keyUrl);
            break;
        }
        try {
            url = TableUtil.unreverseUrl(keyUrl);
        }
        catch (Exception e) {
            return;
        }
        if (page == null) {
            if (!this.additionsAllowed) {
                return;
            }
            page = WebPage.newBuilder().build();
            this.schedule.initializeSchedule(url, page);
            page.setStatus(1);
            try {
                this.scoringFilters.initialScore(url, page);
            }
            catch (ScoringFilterException e) {
                page.setScore(Float.valueOf(0.0f));
            }
        } else {
            byte status = page.getStatus().byteValue();
            switch (status) {
                case 2: 
                case 4: 
                case 5: 
                case 38: {
                    int modified = 0;
                    if (status == 38) {
                        modified = 2;
                    }
                    ByteBuffer prevSig = page.getPrevSignature();
                    ByteBuffer signature = page.getSignature();
                    if (prevSig != null && signature != null) {
                        modified = SignatureComparator.compare(prevSig, signature) != 0 ? 1 : 2;
                    }
                    long fetchTime = page.getFetchTime();
                    long prevFetchTime = page.getPrevFetchTime();
                    long modifiedTime = page.getModifiedTime();
                    long prevModifiedTime = page.getPrevModifiedTime();
                    CharSequence lastModified = page.getHeaders().get(new Utf8("Last-Modified"));
                    if (lastModified != null) {
                        try {
                            modifiedTime = HttpDateFormat.toLong(lastModified.toString());
                            prevModifiedTime = page.getModifiedTime();
                        }
                        catch (Exception e) {
                            // empty catch block
                        }
                    }
                    this.schedule.setFetchSchedule(url, page, prevFetchTime, prevModifiedTime, fetchTime, modifiedTime, modified);
                    if (this.maxInterval >= page.getFetchInterval()) break;
                    this.schedule.forceRefetch(url, page, false);
                    break;
                }
                case 34: {
                    this.schedule.setPageRetrySchedule(url, page, 0L, page.getPrevModifiedTime(), page.getFetchTime());
                    if (page.getRetriesSinceFetch() < this.retryMax) {
                        page.setStatus(1);
                        break;
                    }
                    page.setStatus(3);
                    break;
                }
                case 3: {
                    this.schedule.setPageGoneSchedule(url, page, 0L, page.getPrevModifiedTime(), page.getFetchTime());
                }
            }
        }
        if (page.getInlinks() != null) {
            page.getInlinks().clear();
        }
        int smallestDist = Integer.MAX_VALUE;
        for (ScoreDatum inlink : this.inlinkedScoreData) {
            int inlinkDist = inlink.getDistance();
            if (inlinkDist < smallestDist) {
                smallestDist = inlinkDist;
            }
            page.getInlinks().put((CharSequence)new Utf8(inlink.getUrl()), (CharSequence)new Utf8(inlink.getAnchor()));
        }
        if (smallestDist != Integer.MAX_VALUE) {
            int newDistance;
            int oldDistance = Integer.MAX_VALUE;
            CharSequence oldDistUtf8 = page.getMarkers().get(DbUpdaterJob.DISTANCE);
            if (oldDistUtf8 != null) {
                oldDistance = Integer.parseInt(oldDistUtf8.toString());
            }
            if ((newDistance = smallestDist + 1) < oldDistance) {
                page.getMarkers().put((CharSequence)DbUpdaterJob.DISTANCE, (CharSequence)new Utf8(Integer.toString(newDistance)));
            }
        }
        try {
            this.scoringFilters.updateScore(url, page, this.inlinkedScoreData);
        }
        catch (ScoringFilterException e) {
            LOG.warn("Scoring filters failed with exception " + StringUtils.stringifyException((Throwable)e));
        }
        if (page.getMetadata().get(FetcherJob.REDIRECT_DISCOVERED) != null) {
            page.getMetadata().put((CharSequence)FetcherJob.REDIRECT_DISCOVERED, null);
        }
        Mark.GENERATE_MARK.removeMarkIfExist(page);
        Mark.FETCH_MARK.removeMarkIfExist(page);
        Utf8 parse_mark = Mark.PARSE_MARK.checkMark(page);
        if (parse_mark != null) {
            Mark.UPDATEDB_MARK.putMark(page, parse_mark);
            Mark.PARSE_MARK.removeMark(page);
        }
        context.write((Object)keyUrl, (Object)page);
    }
}

