/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.indexer.solr;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.indexer.solr.SolrUtils;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.SolrParams;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SolrDeleteDuplicates
extends Reducer<Text, SolrRecord, Text, SolrRecord>
implements Tool {
    public static final Logger LOG = LoggerFactory.getLogger(SolrDeleteDuplicates.class);
    private static final String SOLR_GET_ALL_QUERY = "id:[* TO *]";
    private static final int NUM_MAX_DELETE_REQUEST = 1000;
    private Configuration conf;
    private SolrServer solr;
    private int numDeletes = 0;
    private UpdateRequest updateRequest = new UpdateRequest();

    public Configuration getConf() {
        return this.conf;
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
    }

    public void setup(Reducer.Context job) throws IOException {
        Configuration conf = job.getConfiguration();
        this.solr = SolrUtils.getHttpSolrServer(conf);
    }

    public void cleanup(Reducer.Context context) throws IOException {
        try {
            if (this.numDeletes > 0) {
                this.updateRequest.process(this.solr);
                this.solr.commit();
            }
        }
        catch (SolrServerException e) {
            throw new IOException(e);
        }
    }

    public void reduce(Text key, Iterable<SolrRecord> values, Reducer.Context context) throws IOException {
        Iterator<SolrRecord> iterator = values.iterator();
        SolrRecord recordToKeep = iterator.next();
        while (iterator.hasNext()) {
            SolrRecord solrRecord = iterator.next();
            if (solrRecord.getBoost() > recordToKeep.getBoost() || solrRecord.getBoost() == recordToKeep.getBoost() && solrRecord.getTstamp() > recordToKeep.getTstamp()) {
                this.updateRequest.deleteById(recordToKeep.id);
                recordToKeep = solrRecord;
            } else {
                this.updateRequest.deleteById(solrRecord.id);
            }
            ++this.numDeletes;
            if (this.numDeletes < 1000) continue;
            try {
                this.updateRequest.process(this.solr);
            }
            catch (SolrServerException e) {
                throw new IOException(e);
            }
            this.updateRequest = new UpdateRequest();
            this.numDeletes = 0;
        }
    }

    public boolean dedup(String solrUrl) throws IOException, InterruptedException, ClassNotFoundException {
        LOG.info("SolrDeleteDuplicates: starting...");
        LOG.info("SolrDeleteDuplicates: Solr url: " + solrUrl);
        this.getConf().set("solr.server.url", solrUrl);
        Job job = new Job(this.getConf(), "solrdedup");
        job.setInputFormatClass(SolrInputFormat.class);
        job.setOutputFormatClass(NullOutputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(SolrRecord.class);
        job.setMapperClass(Mapper.class);
        job.setReducerClass(SolrDeleteDuplicates.class);
        return job.waitForCompletion(true);
    }

    public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        if (args.length != 1) {
            System.err.println("Usage: SolrDeleteDuplicates <solr url>");
            return 1;
        }
        boolean result = this.dedup(args[0]);
        if (result) {
            LOG.info("SolrDeleteDuplicates: done.");
            return 0;
        }
        return -1;
    }

    public static void main(String[] args) throws Exception {
        int result = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new SolrDeleteDuplicates(), (String[])args);
        System.exit(result);
    }

    public static class SolrInputFormat
    extends InputFormat<Text, SolrRecord> {
        public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
            QueryResponse response;
            Configuration conf = context.getConfiguration();
            int numSplits = context.getNumReduceTasks();
            HttpSolrServer solr = SolrUtils.getHttpSolrServer(conf);
            SolrQuery solrQuery = new SolrQuery(SolrDeleteDuplicates.SOLR_GET_ALL_QUERY);
            solrQuery.setFields(new String[]{"id"});
            solrQuery.setRows(Integer.valueOf(1));
            try {
                response = solr.query((SolrParams)solrQuery);
            }
            catch (SolrServerException e) {
                throw new IOException(e);
            }
            int numResults = (int)response.getResults().getNumFound();
            int numDocsPerSplit = numResults / numSplits;
            int currentDoc = 0;
            ArrayList<InputSplit> splits = new ArrayList<InputSplit>();
            for (int i = 0; i < numSplits - 1; ++i) {
                splits.add(new SolrInputSplit(currentDoc, numDocsPerSplit));
                currentDoc += numDocsPerSplit;
            }
            splits.add(new SolrInputSplit(currentDoc, numResults - currentDoc));
            return splits;
        }

        public RecordReader<Text, SolrRecord> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
            QueryResponse response;
            Configuration conf = context.getConfiguration();
            HttpSolrServer solr = SolrUtils.getHttpSolrServer(conf);
            SolrInputSplit solrSplit = (SolrInputSplit)split;
            int numDocs = (int)solrSplit.getLength();
            SolrQuery solrQuery = new SolrQuery(SolrDeleteDuplicates.SOLR_GET_ALL_QUERY);
            solrQuery.setFields(new String[]{"id", "boost", "tstamp", "digest"});
            solrQuery.setStart(Integer.valueOf(solrSplit.getDocBegin()));
            solrQuery.setRows(Integer.valueOf(numDocs));
            try {
                response = solr.query((SolrParams)solrQuery);
            }
            catch (SolrServerException e) {
                throw new IOException(e);
            }
            SolrDocumentList solrDocs = response.getResults();
            return new SolrRecordReader(solrDocs, numDocs);
        }
    }

    public static class SolrRecordReader
    extends RecordReader<Text, SolrRecord> {
        private int currentDoc = 0;
        private int numDocs;
        private Text text;
        private SolrRecord record;
        private SolrDocumentList solrDocs;

        public SolrRecordReader(SolrDocumentList solrDocs, int numDocs) {
            this.solrDocs = solrDocs;
            this.numDocs = numDocs;
        }

        public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
            this.text = new Text();
            this.record = new SolrRecord();
        }

        public void close() throws IOException {
        }

        public float getProgress() throws IOException {
            return (float)this.currentDoc / (float)this.numDocs;
        }

        public Text getCurrentKey() throws IOException, InterruptedException {
            return this.text;
        }

        public SolrRecord getCurrentValue() throws IOException, InterruptedException {
            return this.record;
        }

        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (this.currentDoc >= this.numDocs) {
                return false;
            }
            SolrDocument doc = (SolrDocument)this.solrDocs.get(this.currentDoc);
            String digest = (String)doc.getFieldValue("digest");
            this.text.set(digest);
            this.record.readSolrDocument(doc);
            ++this.currentDoc;
            return true;
        }
    }

    public static class SolrInputSplit
    extends InputSplit
    implements Writable {
        private int docBegin;
        private int numDocs;

        public SolrInputSplit() {
        }

        public SolrInputSplit(int docBegin, int numDocs) {
            this.docBegin = docBegin;
            this.numDocs = numDocs;
        }

        public int getDocBegin() {
            return this.docBegin;
        }

        public long getLength() throws IOException {
            return this.numDocs;
        }

        public String[] getLocations() throws IOException {
            return new String[0];
        }

        public void readFields(DataInput in) throws IOException {
            this.docBegin = in.readInt();
            this.numDocs = in.readInt();
        }

        public void write(DataOutput out) throws IOException {
            out.writeInt(this.docBegin);
            out.writeInt(this.numDocs);
        }
    }

    public static class SolrRecord
    implements Writable {
        private float boost;
        private long tstamp;
        private String id;

        public SolrRecord() {
        }

        public SolrRecord(String id, float boost, long tstamp) {
            this.id = id;
            this.boost = boost;
            this.tstamp = tstamp;
        }

        public String getId() {
            return this.id;
        }

        public float getBoost() {
            return this.boost;
        }

        public long getTstamp() {
            return this.tstamp;
        }

        public void readSolrDocument(SolrDocument doc) {
            this.id = (String)doc.getFieldValue("id");
            this.boost = ((Float)doc.getFieldValue("boost")).floatValue();
            Date buffer = (Date)doc.getFieldValue("tstamp");
            this.tstamp = buffer.getTime();
        }

        public void readFields(DataInput in) throws IOException {
            this.id = Text.readString((DataInput)in);
            this.boost = in.readFloat();
            this.tstamp = in.readLong();
        }

        public void write(DataOutput out) throws IOException {
            Text.writeString((DataOutput)out, (String)this.id);
            out.writeFloat(this.boost);
            out.writeLong(this.tstamp);
        }
    }
}

