package cn.gtmap.crawler.news.core;

import cn.gtmap.crawler.news.ArticleSave;
import cn.gtmap.crawler.news.model.Article;
import com.gtis.common.util.UUIDGenerator;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.apache.hadoop.metrics2.sink.ganglia.AbstractGangliaSink;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
import org.apache.solr.response.RawResponseWriter;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

/* loaded from: input_file:WEB-INF/classes/cn/gtmap/crawler/news/core/ResultCrawlerProcessor.class */
public class ResultCrawlerProcessor extends BaseLogger implements PageProcessor {
    public static final String TDDJ_CHN = "tddj";
    public static final String TDCR_CHN = "tdcr";
    private Site site = Site.me().setRetryTimes(3).setSleepTime(5000);

    @Override // us.codecraft.webmagic.processor.PageProcessor
    public void process(Page page) {
        Selectable xpath = page.getHtml().xpath("//div[@id='cx1']");
        Selectable xpath2 = page.getHtml().xpath("//div[@class='page']");
        String str = page.getUrl().get();
        int parseInt = Integer.parseInt(xpath2.xpath("//em[2]/text()").get());
        if (str.indexOf("?page") >= 0) {
            getInformation(xpath, str);
            return;
        }
        ArrayList arrayList = new ArrayList();
        for (int i = 1; i <= parseInt; i++) {
            arrayList.add(str.concat("?page=") + String.valueOf(i));
        }
        page.addTargetRequests(arrayList);
    }

    @Override // us.codecraft.webmagic.processor.PageProcessor
    public Site getSite() {
        return this.site;
    }

    private void getInformation(Selectable selectable, String str) {
        List<Selectable> nodes = selectable.xpath("//tr").nodes();
        for (int i = 1; i < nodes.size(); i++) {
            Selectable selectable2 = nodes.get(i);
            List<Selectable> nodes2 = selectable2.xpath("//td").nodes();
            for (int i2 = 0; i2 < nodes2.size(); i2++) {
                if (i2 == 0) {
                    String str2 = nodes2.get(0).links().get().split(AbstractGangliaSink.EQUAL)[1];
                    String channelId = getChannelId(str);
                    Article article = new Article();
                    article.set("id", UUIDGenerator.generate());
                    article.set("channel", channelId);
                    article.set("title", str2);
                    HashMap hashMap = new HashMap();
                    if ("tddj".equals(channelId)) {
                        hashMap.put("bdcqzh", nodes2.get(0).xpath("//a/text()").get());
                        hashMap.put("bdcqlr", selectable2.xpath("//td[2]/text()").get());
                        hashMap.put("bdczl", selectable2.xpath("//td[3]/text()").get());
                        hashMap.put("bdcmj", selectable2.xpath("//td[4]/text()").get());
                    } else {
                        hashMap.put("cjdw", selectable2.xpath("//td[2]/text()").get());
                        hashMap.put("crmj", selectable2.xpath("//td[3]/text()").get());
                        hashMap.put("cjjg", selectable2.xpath("//td[4]/text()").get());
                        hashMap.put("cjsj", selectable2.xpath("//td[5]/text()").get());
                    }
                    article.set(RawResponseWriter.CONTENT, selectable2.toString().replace("href", WikipediaTokenizer.HEADING));
                    try {
                        ArticleSave.getArticleSave().saveArticle(article);
                    } catch (Exception e) {
                        this.logger.error(e.getLocalizedMessage());
                    }
                }
            }
        }
    }

    private String getChannelId(String str) {
        return str.split("web/")[1].split("_")[0].split("Search")[1].toLowerCase();
    }
}
