Java-获取文件的编码 发表于 2014-12-16 | 分类于 Java | | 获取任意文件的编码格式。 首先,需要获取Java系统支持的所有编码集: 1Set<String> charsetNames = Charset.availableCharsets().keySet(); 然后通过读取文件的一小部分字节,对应每个编码类型逐一进行匹配,最终得出文件的编码,代码如下: 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889import java.io.BufferedInputStream;import java.io.File;import java.io.FileInputStream;import java.nio.ByteBuffer;import java.nio.charset.CharacterCodingException;import java.nio.charset.Charset;import java.nio.charset.CharsetDecoder;import java.util.Set;public class CharsetDetector { public Charset detectCharset(File f, String[] charsets) { Charset charset = null; for (String charsetName : charsets) { charset = detectCharset(f, Charset.forName(charsetName)); if (charset != null) { break; } } return charset; } private Charset detectCharset(File f, Charset charset) { try { BufferedInputStream input = new BufferedInputStream(new FileInputStream(f)); CharsetDecoder decoder = charset.newDecoder(); decoder.reset(); byte[] buffer = new byte[512]; boolean identified = false; while ((input.read(buffer) != -1) && (!identified)) { identified = identify(buffer, decoder); } input.close(); if (identified) { return charset; } else { return null; } } catch (Exception e) { return null; } } private boolean identify(byte[] bytes, CharsetDecoder decoder) { try { decoder.decode(ByteBuffer.wrap(bytes)); } catch (CharacterCodingException e) { return false; } return true; } public static void main(String[] args) { try{ File f = new File("/Users/YI/Desktop/audio.pcm"); Set<String> charsetNames = Charset.availableCharsets().keySet(); String[] tmp = new String[charsetNames.size()]; String[] charsetsToBeTested = charsetNames.toArray(tmp); CharsetDetector cd = new CharsetDetector(); Charset charset = cd.detectCharset(f, charsetsToBeTested); if (charset != null) { System.out.println(charset.toString()); FileInputStream is = new FileInputStream(f); while(true){ byte[] by = new byte[1024]; int read = is.read(by); if(read == -1) break; } }else{ System.out.println("Unrecognized charset."); } }catch(Exception e){ } }} 欢迎您扫一扫上面的微信公众号,订阅我的博客!