/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.crawl;

import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import org.apache.avro.util.Utf8;
import org.apache.gora.filter.FilterOp;
import org.apache.gora.filter.MapFieldValueFilter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.DbUpdateMapper;
import org.apache.nutch.crawl.DbUpdateReducer;
import org.apache.nutch.crawl.NutchWritable;
import org.apache.nutch.crawl.UrlWithScore;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.scoring.ScoringFilters;
import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.StorageUtils;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.NutchTool;
import org.apache.nutch.util.TimingUtil;
import org.apache.nutch.util.ToolUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DbUpdaterJob
extends NutchTool
implements Tool {
    public static final Logger LOG = LoggerFactory.getLogger(DbUpdaterJob.class);
    private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
    public static final Utf8 DISTANCE;

    public DbUpdaterJob() {
    }

    public DbUpdaterJob(Configuration conf) {
        this.setConf(conf);
    }

    @Override
    public Map<String, Object> run(Map<String, Object> args) throws Exception {
        String crawlId = (String)args.get("crawl");
        String batchId = (String)args.get("batch");
        this.numJobs = 1;
        this.currentJobNum = 0;
        if (batchId == null) {
            batchId = "-all";
        }
        this.getConf().set("nutch.batch.name", batchId);
        ScoringFilters scoringFilters = new ScoringFilters(this.getConf());
        HashSet<WebPage.Field> fields = new HashSet<WebPage.Field>(FIELDS);
        fields.addAll(scoringFilters.getFields());
        this.currentJob = new NutchJob(this.getConf(), "update-table");
        if (crawlId != null) {
            this.currentJob.getConfiguration().set("storage.crawl.id", crawlId);
        }
        this.currentJob.setPartitionerClass(UrlWithScore.UrlOnlyPartitioner.class);
        this.currentJob.setSortComparatorClass(UrlWithScore.UrlScoreComparator.class);
        this.currentJob.setGroupingComparatorClass(UrlWithScore.UrlScoreComparator.UrlOnlyComparator.class);
        MapFieldValueFilter<String, WebPage> batchIdFilter = this.getBatchIdFilter(batchId);
        StorageUtils.initMapperJob(this.currentJob, fields, UrlWithScore.class, NutchWritable.class, DbUpdateMapper.class, batchIdFilter);
        StorageUtils.initReducerJob(this.currentJob, DbUpdateReducer.class);
        this.currentJob.waitForCompletion(true);
        ToolUtil.recordJobStatus(null, this.currentJob, this.results);
        return this.results;
    }

    private MapFieldValueFilter<String, WebPage> getBatchIdFilter(String batchId) {
        if (batchId.equals(Nutch.ALL_CRAWL_ID.toString())) {
            return null;
        }
        MapFieldValueFilter filter = new MapFieldValueFilter();
        filter.setFieldName(WebPage.Field.MARKERS.toString());
        filter.setFilterOp(FilterOp.EQUALS);
        filter.setFilterIfMissing(true);
        filter.setMapKey(Mark.GENERATE_MARK.getName());
        filter.getOperands().add(new Utf8(batchId));
        return filter;
    }

    private int updateTable(String crawlId, String batchId) throws Exception {
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        long start = System.currentTimeMillis();
        LOG.info("DbUpdaterJob: starting at " + sdf.format(start));
        if (batchId.equals("-all")) {
            LOG.info("DbUpdaterJob: updatinging all");
        } else {
            LOG.info("DbUpdaterJob: batchId: " + batchId);
        }
        this.run(ToolUtil.toArgMap("crawl", crawlId, "batch", batchId));
        long finish = System.currentTimeMillis();
        LOG.info("DbUpdaterJob: finished at " + sdf.format(finish) + ", time elapsed: " + TimingUtil.elapsedTime(start, finish));
        return 0;
    }

    public int run(String[] args) throws Exception {
        String crawlId = null;
        String usage = "Usage: DbUpdaterJob (<batchId> | -all) [-crawlId <id>]     <batchId>     - crawl identifier returned by Generator, or -all for all \n \t \t    generated batchId-s\n    -crawlId <id> - the id to prefix the schemas to operate on, \n \t \t    (default: storage.crawl.id)\n";
        if (args.length == 0) {
            System.err.println(usage);
            return -1;
        }
        String batchId = args[0];
        if (!batchId.equals("-all") && batchId.startsWith("-")) {
            System.err.println(usage);
            return -1;
        }
        for (int i = 1; i < args.length; ++i) {
            if (!"-crawlId".equals(args[i])) {
                throw new IllegalArgumentException("arg " + args[i] + " not recognized");
            }
            this.getConf().set("storage.crawl.id", args[++i]);
        }
        return this.updateTable(crawlId, batchId);
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new DbUpdaterJob(), (String[])args);
        System.exit(res);
    }

    static {
        FIELDS.add(WebPage.Field.OUTLINKS);
        FIELDS.add(WebPage.Field.INLINKS);
        FIELDS.add(WebPage.Field.STATUS);
        FIELDS.add(WebPage.Field.PREV_SIGNATURE);
        FIELDS.add(WebPage.Field.SIGNATURE);
        FIELDS.add(WebPage.Field.MARKERS);
        FIELDS.add(WebPage.Field.METADATA);
        FIELDS.add(WebPage.Field.RETRIES_SINCE_FETCH);
        FIELDS.add(WebPage.Field.FETCH_TIME);
        FIELDS.add(WebPage.Field.MODIFIED_TIME);
        FIELDS.add(WebPage.Field.FETCH_INTERVAL);
        FIELDS.add(WebPage.Field.PREV_FETCH_TIME);
        FIELDS.add(WebPage.Field.PREV_MODIFIED_TIME);
        FIELDS.add(WebPage.Field.HEADERS);
        DISTANCE = new Utf8("dist");
    }
}

