/*
 * Author: xyang
 *
 * Project: fileCenter
 *
 * File: CharsetDetector.java
 *
 * LastModified: 2009-10-30 10:16:01
 *
 * Copyright (c) 2009 gtis. All Rights Reserved.
 *
 * Copying of this document or code and giving it to others and the
 * use or communication of the contents thereof, are forbidden without
 * expressed authority. Offenders are liable to the payment of damages.
 * All rights reserved in the event of the grant of a invention patent or the
 * registration of a utility model, design or code.
 *
 * Issued by gtis Ltd.
 */

package com.gtis.fileCenter;

import org.mozilla.intl.chardet.nsDetector;
import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
import org.mozilla.intl.chardet.nsPSMDetector;

import java.io.*;

/**
 * .
 * <p/>
 *
 * @author <a href="mailto:oxsean@gmail.com">sean yang</a>
 * @version V1.0, 2009-10-30
 */
public class CharsetDetector {

    public static final String DEFAULT_ENCODING = "gbk";

    private boolean found = false;
    private String encoding = DEFAULT_ENCODING;

    public static String detect(InputStream is) throws IOException {
        return detect(is, nsPSMDetector.NO_OF_LANGUAGES);
    }

    public static String detect(InputStream is, int languageHint) throws IOException {
        return new CharsetDetector().detectEncoding(is, languageHint);
    }

    public static String detect(File file) throws IOException {
        return detect(new FileInputStream(file), nsPSMDetector.NO_OF_LANGUAGES);
    }

    public static String detect(File file, int languageHint) throws IOException {
        return detect(new FileInputStream(file), languageHint);
    }


    private String detectEncoding(InputStream is, int languageHint) throws IOException {
        nsDetector det = new nsDetector(languageHint);
        det.Init(new nsICharsetDetectionObserver() {
            public void Notify(String charset) {
                found = true;
                encoding = charset;
            }
        });

        BufferedInputStream bis = new BufferedInputStream(is);
        byte[] buf = new byte[1024];
        int len;
        boolean done = false;
        boolean isAscii = true;

        try {
            while ((len = bis.read(buf, 0, buf.length)) != -1) {
                if (isAscii)
                    isAscii = det.isAscii(buf, len);
                if (!isAscii && !done)
                    done = det.DoIt(buf, len, false);
            }
            det.DataEnd();
        } catch (Exception ignored) {
        } finally {
            bis.close();
        }

        if (isAscii) {
            encoding = "ASCII";
            found = true;
        }
        if (!found) {
            String prob[] = det.getProbableCharsets();
            if (prob.length > 0) {
                // 在没有发现情况下，则取第一个可能的编码
                encoding = prob[0];
            } else {
                return DEFAULT_ENCODING;
            }
        }
        return encoding;
    }
}

