/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.indexer;

import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import org.apache.avro.util.Utf8;
import org.apache.gora.filter.FilterOp;
import org.apache.gora.filter.MapFieldValueFilter;
import org.apache.gora.mapreduce.GoraMapper;
import org.apache.gora.mapreduce.StringComparator;
import org.apache.gora.persistency.Persistent;
import org.apache.gora.store.DataStore;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.indexer.IndexUtil;
import org.apache.nutch.indexer.IndexWriters;
import org.apache.nutch.indexer.IndexerOutputFormat;
import org.apache.nutch.indexer.IndexingFilters;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.parse.ParseStatusUtils;
import org.apache.nutch.scoring.ScoringFilters;
import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.ParseStatus;
import org.apache.nutch.storage.StorageUtils;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.NutchTool;
import org.apache.nutch.util.TableUtil;
import org.apache.nutch.util.ToolUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class IndexingJob
extends NutchTool
implements Tool {
    public static Logger LOG = LoggerFactory.getLogger(IndexingJob.class);
    private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
    private static final Utf8 REINDEX = new Utf8("-reindex");

    private static Collection<WebPage.Field> getFields(Job job) {
        Configuration conf = job.getConfiguration();
        HashSet<WebPage.Field> columns = new HashSet<WebPage.Field>(FIELDS);
        IndexingFilters filters = new IndexingFilters(conf);
        columns.addAll(filters.getFields());
        ScoringFilters scoringFilters = new ScoringFilters(conf);
        columns.addAll(scoringFilters.getFields());
        return columns;
    }

    @Override
    public Map<String, Object> run(Map<String, Object> args) throws Exception {
        String batchId = (String)args.get("batch");
        Configuration conf = this.getConf();
        conf.set("generate.batch.id", batchId);
        NutchJob job = new NutchJob(conf, "Indexer");
        job.getConfiguration().setClass("mapred.output.key.comparator.class", StringComparator.class, RawComparator.class);
        Collection<WebPage.Field> fields = IndexingJob.getFields(job);
        MapFieldValueFilter<String, WebPage> batchIdFilter = this.getBatchIdFilter(batchId);
        StorageUtils.initMapperJob((Job)job, fields, String.class, NutchDocument.class, IndexerMapper.class, batchIdFilter);
        job.setNumReduceTasks(0);
        job.setOutputFormatClass(IndexerOutputFormat.class);
        job.waitForCompletion(true);
        ToolUtil.recordJobStatus(null, job, this.results);
        return this.results;
    }

    private MapFieldValueFilter<String, WebPage> getBatchIdFilter(String batchId) {
        if (batchId.equals(REINDEX.toString()) || batchId.equals(Nutch.ALL_CRAWL_ID.toString())) {
            return null;
        }
        MapFieldValueFilter filter = new MapFieldValueFilter();
        filter.setFieldName(WebPage.Field.MARKERS.toString());
        filter.setFilterOp(FilterOp.EQUALS);
        filter.setFilterIfMissing(true);
        filter.setMapKey(Mark.UPDATEDB_MARK.getName());
        filter.getOperands().add(new Utf8(batchId));
        return filter;
    }

    public void index(String batchId) throws Exception {
        LOG.info("IndexingJob: starting");
        this.run(ToolUtil.toArgMap("batch", batchId));
        IndexWriters writers = new IndexWriters(this.getConf());
        LOG.info(writers.describe());
        writers.open(this.getConf());
        if (this.getConf().getBoolean("solr.commit.index", true)) {
            writers.commit();
        }
        LOG.info("IndexingJob: done.");
    }

    public int run(String[] args) throws Exception {
        if (args.length < 1) {
            System.err.println("Usage: IndexingJob (<batchId> | -all | -reindex) [-crawlId <id>]");
            return -1;
        }
        if (args.length == 3 && "-crawlId".equals(args[1])) {
            this.getConf().set("storage.crawl.id", args[2]);
        }
        try {
            this.index(args[0]);
            return 0;
        }
        catch (Exception e) {
            LOG.error("SolrIndexerJob: " + StringUtils.stringifyException((Throwable)e));
            return -1;
        }
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new IndexingJob(), (String[])args);
        System.exit(res);
    }

    static {
        FIELDS.add(WebPage.Field.SIGNATURE);
        FIELDS.add(WebPage.Field.PARSE_STATUS);
        FIELDS.add(WebPage.Field.SCORE);
        FIELDS.add(WebPage.Field.MARKERS);
    }

    public static class IndexerMapper
    extends GoraMapper<String, WebPage, String, NutchDocument> {
        public IndexUtil indexUtil;
        public DataStore<String, WebPage> store;
        protected Utf8 batchId;

        public void setup(Mapper.Context context) throws IOException {
            Configuration conf = context.getConfiguration();
            this.batchId = new Utf8(conf.get("generate.batch.id", "-all"));
            this.indexUtil = new IndexUtil(conf);
            try {
                this.store = StorageUtils.createWebStore(conf, String.class, WebPage.class);
            }
            catch (ClassNotFoundException e) {
                throw new IOException(e);
            }
        }

        protected void cleanup(Mapper.Context context) throws IOException, InterruptedException {
            this.store.close();
        }

        public void map(String key, WebPage page, Mapper.Context context) throws IOException, InterruptedException {
            ParseStatus pstatus = page.getParseStatus();
            if (pstatus == null || !ParseStatusUtils.isSuccess(pstatus) || pstatus.getMinorCode() == 100) {
                return;
            }
            Utf8 mark = Mark.UPDATEDB_MARK.checkMark(page);
            if (mark == null) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Skipping " + TableUtil.unreverseUrl(key) + "; not updated on db yet");
                }
                return;
            }
            NutchDocument doc = this.indexUtil.index(key, page);
            if (doc == null) {
                return;
            }
            if (mark != null) {
                Mark.INDEX_MARK.putMark(page, Mark.UPDATEDB_MARK.checkMark(page));
                this.store.put((Object)key, (Persistent)page);
            }
            context.write((Object)key, (Object)doc);
            context.getCounter("IndexerJob", "DocumentCount").increment(1L);
        }
    }
}

