Java自动根据文件内容的编码来读取避免乱码
2015-04-10来源:易贤网

通过cpdetector这个开源的jar包可以自动判断当前文件的内容编码,从而在读取的时候选择正确的编码读取,避免乱码问题。

测试结果,提供截图:

名单

package com.zuidaima.test;

import info.monitorenter.cpdetector.io.ASCIIDetector;

import info.monitorenter.cpdetector.io.CodepageDetectorProxy;

import info.monitorenter.cpdetector.io.JChardetFacade;

import info.monitorenter.cpdetector.io.ParsingDetector;

import info.monitorenter.cpdetector.io.UnicodeDetector;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.InputStreamReader;

public class Main {

public static String getContent(String path) throws Exception {

File file = new File(path);

CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();

detector.add(new ParsingDetector(false));

detector.add(JChardetFacade.getInstance());

detector.add(ASCIIDetector.getInstance());

detector.add(UnicodeDetector.getInstance());

java.nio.charset.Charset charset = null;

try {

charset = detector.detectCodepage(file.toURI().toURL());

} catch (Exception ex) {

ex.printStackTrace();

}

String charsetName = null;

if (charset != null) {

charsetName = charset.name();

} else {

charsetName = "UTF-8";

}

BufferedReader reader = new BufferedReader(new InputStreamReader(

new FileInputStream(file), charsetName));

String line = null;

String lines = "";

while ((line = reader.readLine()) != null) {

lines += line + " ";

}

reader.close();

return lines;

}

public static void main(String[] args) throws Exception {

System.out.println(getContent("bin/gbk.txt"));

System.out.println(getContent("bin/utf8.txt"));

}

}

更多信息请查看IT技术专栏

2026公务员·事业单位培训课程试听报名

  • 报班类型
  • 姓名
  • 手机号
  • 验证码
推荐信息