/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.indexer;

import java.nio.ByteBuffer;
import java.util.List;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilters;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.parse.ParseStatusUtils;
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.parse.ParserJob;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.Protocol;
import org.apache.nutch.protocol.ProtocolFactory;
import org.apache.nutch.protocol.ProtocolOutput;
import org.apache.nutch.protocol.ProtocolStatusUtils;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.StringUtil;
import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class IndexingFiltersChecker
extends Configured
implements Tool {
    public static final Logger LOG = LoggerFactory.getLogger(IndexingFiltersChecker.class);
    Configuration conf;

    public int run(String[] args) throws Exception {
        String contentType = null;
        String url = null;
        String usage = "Usage: IndexingFiltersChecker <url>";
        if (args.length != 1) {
            System.err.println(usage);
            return -1;
        }
        url = URLUtil.toASCII(args[0]);
        if (LOG.isInfoEnabled()) {
            LOG.info("fetching: " + url);
        }
        IndexingFilters indexers = new IndexingFilters(this.conf);
        ProtocolFactory factory = new ProtocolFactory(this.conf);
        Protocol protocol = factory.getProtocol(url);
        WebPage page = WebPage.newBuilder().build();
        page.setBaseUrl((CharSequence)new Utf8(url));
        ProtocolOutput protocolOutput = protocol.getProtocolOutput(url, page);
        page.setProtocolStatus(protocolOutput.getStatus());
        if (protocolOutput.getStatus().getCode() != 1) {
            LOG.error("Fetch failed with protocol status: " + ProtocolStatusUtils.getName(protocolOutput.getStatus().getCode()) + ": " + ProtocolStatusUtils.getMessage(protocolOutput.getStatus()));
            return -1;
        }
        page.setStatus(2);
        page.setFetchTime(System.currentTimeMillis());
        Content content = protocolOutput.getContent();
        if (content == null) {
            LOG.warn("No content for " + url);
            return 0;
        }
        page.setContent(ByteBuffer.wrap(content.getContent()));
        contentType = content.getContentType();
        if (contentType == null) {
            return -1;
        }
        page.setContentType((CharSequence)new Utf8(contentType));
        if (LOG.isInfoEnabled()) {
            LOG.info("parsing: " + url);
            LOG.info("contentType: " + contentType);
        }
        if (ParserJob.isTruncated(url, page)) {
            LOG.warn("Content is truncated, parse may fail!");
        }
        new ParseUtil(this.conf).process(url, page);
        if (!ParseStatusUtils.isSuccess(page.getParseStatus())) {
            LOG.warn("Problem with parse - check log");
            return -1;
        }
        NutchDocument doc = new NutchDocument();
        doc.add("id", url);
        doc.add("digest", StringUtil.toHexString(page.getSignature()));
        try {
            doc = indexers.filter(doc, url, page);
        }
        catch (IndexingException e) {
            e.printStackTrace();
        }
        if (doc == null) {
            LOG.info("Document discarded by indexing filter");
            return 0;
        }
        for (String fname : doc.getFieldNames()) {
            List<String> values = doc.getFieldValues(fname);
            if (values == null) continue;
            for (String value : values) {
                String str = value.toString();
                int minText = Math.min(100, str.length());
                System.out.println(fname + " :\t" + str.substring(0, minText));
            }
        }
        return 0;
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new IndexingFiltersChecker(), (String[])args);
        System.exit(res);
    }

    public Configuration getConf() {
        return this.conf;
    }

    public void setConf(Configuration arg0) {
        this.conf = arg0;
    }
}

