/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.parse;

import java.nio.ByteBuffer;
import java.util.Map;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.SignatureFactory;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.parse.ParserJob;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.Protocol;
import org.apache.nutch.protocol.ProtocolFactory;
import org.apache.nutch.protocol.ProtocolOutput;
import org.apache.nutch.protocol.ProtocolStatusUtils;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.Bytes;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.StringUtil;
import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ParserChecker
implements Tool {
    public static final Logger LOG = LoggerFactory.getLogger(ParserChecker.class);
    private Configuration conf;

    public int run(String[] args) throws Exception {
        Parse parse;
        WebPage page;
        ProtocolFactory factory;
        Protocol protocol;
        ProtocolOutput protocolOutput;
        boolean dumpText = false;
        boolean force = false;
        String contentType = null;
        String url = null;
        String usage = "Usage: ParserChecker [-dumpText] [-forceAs mimeType] url";
        if (args.length == 0) {
            LOG.error(usage);
            return -1;
        }
        for (int i = 0; i < args.length; ++i) {
            if (args[i].equals("-forceAs")) {
                force = true;
                contentType = args[++i];
                continue;
            }
            if (args[i].equals("-dumpText")) {
                dumpText = true;
                continue;
            }
            if (i != args.length - 1) {
                LOG.error(usage);
                System.exit(-1);
                continue;
            }
            url = URLUtil.toASCII(args[i]);
        }
        if (LOG.isInfoEnabled()) {
            LOG.info("fetching: " + url);
        }
        if (!(protocolOutput = (protocol = (factory = new ProtocolFactory(this.conf)).getProtocol(url)).getProtocolOutput(url, page = WebPage.newBuilder().build())).getStatus().isSuccess()) {
            LOG.error("Fetch failed with protocol status: " + ProtocolStatusUtils.getName(protocolOutput.getStatus().getCode()) + ": " + ProtocolStatusUtils.getMessage(protocolOutput.getStatus()));
            return -1;
        }
        Content content = protocolOutput.getContent();
        if (content == null) {
            LOG.error("No content for " + url);
            return -1;
        }
        page.setBaseUrl((CharSequence)new Utf8(url));
        page.setContent(ByteBuffer.wrap(content.getContent()));
        if (force) {
            content.setContentType(contentType);
        } else {
            contentType = content.getContentType();
        }
        if (contentType == null) {
            LOG.error("Failed to determine content type!");
            return -1;
        }
        page.setContentType((CharSequence)new Utf8(contentType));
        if (ParserJob.isTruncated(url, page)) {
            LOG.warn("Content is truncated, parse may fail!");
        }
        if ((parse = new ParseUtil(this.conf).parse(url, page)) == null) {
            LOG.error("Problem with parse - check log");
            return -1;
        }
        byte[] signature = SignatureFactory.getSignature(this.getConf()).calculate(page);
        if (LOG.isInfoEnabled()) {
            LOG.info("parsing: " + url);
            LOG.info("contentType: " + contentType);
            LOG.info("signature: " + StringUtil.toHexString(signature));
        }
        LOG.info("---------\nUrl\n---------------\n");
        System.out.print(url + "\n");
        LOG.info("---------\nMetadata\n---------\n");
        Map<CharSequence, ByteBuffer> metadata = page.getMetadata();
        StringBuffer sb = new StringBuffer();
        if (metadata != null) {
            for (Map.Entry<CharSequence, ByteBuffer> entry : metadata.entrySet()) {
                sb.append(entry.getKey().toString()).append(" : \t").append(Bytes.toString(entry.getValue())).append("\n");
            }
            System.out.print(sb.toString());
        }
        LOG.info("---------\nOutlinks\n---------\n");
        sb = new StringBuffer();
        for (Outlink l : parse.getOutlinks()) {
            sb.append("  outlink: ").append(l).append('\n');
        }
        System.out.print(sb.toString());
        if (page.getHeaders() != null) {
            LOG.info("---------\nHeaders\n---------\n");
            Map<CharSequence, CharSequence> headers = page.getHeaders();
            StringBuffer headersb = new StringBuffer();
            if (metadata != null) {
                for (Map.Entry<CharSequence, CharSequence> entry : headers.entrySet()) {
                    headersb.append(entry.getKey().toString()).append(" : \t").append(entry.getValue()).append("\n");
                }
                System.out.print(headersb.toString());
            }
        }
        if (dumpText) {
            LOG.info("---------\nParseText\n---------\n");
            System.out.print(parse.getText());
        }
        return 0;
    }

    public Configuration getConf() {
        return this.conf;
    }

    public void setConf(Configuration c) {
        this.conf = c;
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new ParserChecker(), (String[])args);
        System.exit(res);
    }
}

