package com.gtis.archive.web;

import com.gtis.archive.core.environment.EnvHolder;
import com.gtis.archive.entity.Original;
import com.gtis.archive.service.OcrService;
import com.gtis.archive.service.OriginalService;
import com.gtis.archive.util.Struts2Utils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.struts2.ServletActionContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.util.FileCopyUtils;

import javax.servlet.http.HttpServletRequest;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created by Think on 2017/8/1.
 */
public class OcrAction {

    private static final Logger logger = LoggerFactory.getLogger(OcrAction.class);

    @Autowired
    private OriginalService originalService;

    @Autowired
    private OcrService ocrService;


    /**
     * 原文id
     */
    private String originalId;

    /**
     * 文件名
     */
    private String filename;

    /**
     * 上传的文件
     */
    private File filehtml;

    /**
     * 文本文件
     */
    private File filetxt;

    /**
     * ocr识别过的pdf
     */
    private File fileocrpdf;

    /**
     * log
     */
    private String log;

    /**
     * html后缀
     */
    private static final String HTML_SUFFIX = "_html.html";

    /**
     * ocr pdf后缀
     */
    private static final String OCR_PDF_SUFFIX = "_ocr.pdf";

    public String getOriginalId() {
        return originalId;
    }

    public void setOriginalId(String originalId) {
        this.originalId = originalId;
    }

    public String getFilename() {
        return filename;
    }

    public void setFilename(String filename) {
        this.filename = filename;
    }

    public File getFilehtml() {
        return filehtml;
    }

    public void setFilehtml(File filehtml) {
        this.filehtml = filehtml;
    }

    public File getFiletxt() {
        return filetxt;
    }

    public void setFiletxt(File filetxt) {
        this.filetxt = filetxt;
    }

    public File getFileocrpdf() {
        return fileocrpdf;
    }

    public void setFileocrpdf(File fileocrpdf) {
        this.fileocrpdf = fileocrpdf;
    }

    public String getLog() {
        return log;
    }

    public void setLog(String log) {
        this.log = log;
    }

    private String utf8 = "UTF-8";
    /**
     * OCR识别
     * @return
     */
    public void execute(){
        String ocrUrl = EnvHolder.getAppEnv().get("ocr.url");
        String htmlUrlText = "htmlUrl";

        if (StringUtils.isBlank(ocrUrl)) {
            logger.error("ocr.url没有配置");
        }

        if (StringUtils.isBlank(originalId)) {
            logger.error("原文id为空");
        }

        Map<String, String> map = new HashMap<String, String>();
        HttpServletRequest request = ServletActionContext.getRequest();
        String baseUrl = request.getRequestURL().toString().replace(request.getRequestURI(), "") + "/archive";
        Original original = originalService.getOriginal(originalId);
        String originalPath = EnvHolder.getAppEnv().getExpr(original.getPath());
        String originalHtmlPath = originalPath + HTML_SUFFIX;
        File html = ocrService.getHtmlFile(originalHtmlPath);
        if (html != null && html.exists()) {
            map.put(htmlUrlText, baseUrl + "/og!get.action?id=" + original.getId() + "&html=true");
        } else {
            String fileUrl = baseUrl + "/og!get.action?id=" + original.getId() + "&preview=false";
            String fileName = original.getName();
            String callback = baseUrl + "/ocr!getOCRResult.action?originalId=" + originalId;
            HttpURLConnection connection = null;
            BufferedReader reader = null;
            try {
                String getURL = "http://" + ocrUrl + "/NewTask?fileurl=" + URLEncoder.encode(fileUrl, utf8)
                        + "&filename=" + URLEncoder.encode(fileName, utf8)
                        + "&callback=" + URLEncoder.encode(callback, utf8);
                String extension = original.getExtension();
                String content = null;
                if ("pdf".equals(extension)) {
                    content = ocrService.readPdf(originalPath);
                    logger.info("pdf content = {}", content);
                    if (StringUtils.isBlank(content)) {
                        getURL = getURL + "&needocr=" + URLEncoder.encode("true", utf8);
                    } else {
                        original.setContent(content);
                        originalService.saveOriginal(original);
                        logger.info("不需要ocr");
                        map.put(htmlUrlText, "noneed");
                        Struts2Utils.renderJson(map);
                        return;
                    }
                }

                if ("txt".equals(extension) || "doc".equals(extension) || "docx".equals(extension) || "xls".equals(extension) || "xlsx".equals(extension)) {
                    if ("txt".equals(extension)) {
                      content = ocrService.readTxt(originalPath);
                    } else if ("doc".equals(extension) || "docx".equals(extension)) {
                        content = ocrService.readWord(originalPath, extension);
                    } else {
                        content = ocrService.readExcel(originalPath, extension);
                    }
                    original.setContent(content);
                    originalService.saveOriginal(original);
                    map.put(htmlUrlText, "noneed");
                    Struts2Utils.renderJson(map);
                    return;
                }

                logger.info("url = [{}]" , getURL);
                URL getUrl = new URL(getURL);
                // 根据拼凑的URL，打开连接，URL.openConnection函数会根据URL的类型，
                // 返回不同的URLConnection子类的对象，这里URL是一个http，因此实际返回的是HttpURLConnection
                connection = (HttpURLConnection) getUrl.openConnection();
                // 进行连接，但是实际上get request要在下一句的connection.getInputStream()函数中才会真正发到
                // 服务器
                connection.connect();
                // 取得输入流，并使用Reader读取
                reader = new BufferedReader(new InputStreamReader(connection.getInputStream(), utf8));//设置编码,否则中文乱码
                logger.info("Contents of get request");
                String lines;
                String fileId = null;
                String htmlUrl = "";
                StringBuilder stringBuilder = new StringBuilder();
                while ((lines = reader.readLine()) != null) {
                    if (lines.contains("QueryFile")) {
                        int question = lines.indexOf('?');
                        int and = lines.indexOf('&');
                        if (question > -1 && and > -1) {
                            fileId = lines.substring(question + 1, and);
                            if (fileId.contains("fileid") && fileId.contains("=")) {
                                fileId = fileId.split("=")[1];
                                htmlUrl = ocrUrl + "/QueryFile?fileid=" + fileId + "&filename=" + fileName;
                                stringBuilder.append("http://").append(htmlUrl);
                                htmlUrl = stringBuilder.toString();
                            }
                        }

                    }
                }
                if (StringUtils.isNotBlank(htmlUrl)) {
                    map.put(htmlUrlText, htmlUrl);
                }
                logger.info("Contents of get request ends");
            } catch (IOException e) {
                logger.error(e.getMessage());
            } finally {
                if (connection != null) {
                    connection.disconnect();
                }
                IOUtils.closeQuietly(reader);
            }
        }
        Struts2Utils.renderJson(map);
    }

    /**
     * OCR回调接口
     */
    public void getOCRResult() {
        BufferedOutputStream bos = null;
        BufferedInputStream bis = null;
        BufferedInputStream pdfBis = null;
        BufferedOutputStream pdfBos = null;
        BufferedInputStream txtBis = null;
        BufferedOutputStream txtBos = null;
        InputStream pdfis = null;
        InputStream htmlis = null;
        InputStream txtis = null;

        if (StringUtils.isBlank(filename)) {
            logger.error("文件名为空");
        }

        if (StringUtils.isBlank(originalId)) {
            logger.error("原文id为空");
        }

        try {
            log = replaceBlank(log);
            log = URLDecoder.decode(log.replace("=", "%").replace("%%", "%"), utf8);
            logger.info("服务端的日志为, {}", log);
            logger.info(log);
            htmlis = new FileInputStream(getFilehtml());
            txtis = new FileInputStream(getFiletxt());
            if (getFileocrpdf() != null) {
                pdfis = new FileInputStream(getFileocrpdf());
            }
            Original original = originalService.getOriginal(originalId);
            String originalPath = EnvHolder.getAppEnv().getExpr(original.getPath());
            String htmlPath = originalPath + HTML_SUFFIX;
            String pdfPath = originalPath + OCR_PDF_SUFFIX;
            bis = new BufferedInputStream(htmlis);
            bos = new BufferedOutputStream(new FileOutputStream(htmlPath));
            FileCopyUtils.copy(bis, bos);
            if (pdfis != null) {
                pdfBis = new BufferedInputStream(pdfis);
                pdfBos = new BufferedOutputStream(new FileOutputStream(pdfPath));
                FileCopyUtils.copy(pdfBis, pdfBos);
            }
            txtBis = new BufferedInputStream(txtis);
            originalService.saveOriginalContent(original, getFiletxt(), txtBis);

        } catch (IOException e) {
            logger.error(e.getMessage());
        } finally {
            IOUtils.closeQuietly(pdfis);
            IOUtils.closeQuietly(htmlis);
            IOUtils.closeQuietly(txtis);
            IOUtils.closeQuietly(bos);
            IOUtils.closeQuietly(bis);
            IOUtils.closeQuietly(pdfBis);
            IOUtils.closeQuietly(pdfBos);
            IOUtils.closeQuietly(txtBis);
            IOUtils.closeQuietly(txtBos);
        }
    }

    /**
     * 将换行符、制表符、空格转为空字符串
     * @param s 待转换字符串
     * @return 转换后的字符串
     */
    private String replaceBlank(String s) {
        if (StringUtils.isNotBlank(s)) {
            Pattern p = Pattern.compile("\\s*|\t|\r|\n");
            Matcher m = p.matcher(s);
            s = m.replaceAll("");
        }
        return s;
    }
}
