Java-获取文件的编码

获取任意文件的编码格式。

首先,需要获取Java系统支持的所有编码集:

1
Set<String> charsetNames = Charset.availableCharsets().keySet();

然后通过读取文件的一小部分字节,对应每个编码类型逐一进行匹配,最终得出文件的编码,代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.Set;
public class CharsetDetector {
public Charset detectCharset(File f, String[] charsets) {
Charset charset = null;
for (String charsetName : charsets) {
charset = detectCharset(f, Charset.forName(charsetName));
if (charset != null) {
break;
}
}
return charset;
}
private Charset detectCharset(File f, Charset charset) {
try {
BufferedInputStream input = new BufferedInputStream(new FileInputStream(f));
CharsetDecoder decoder = charset.newDecoder();
decoder.reset();
byte[] buffer = new byte[512];
boolean identified = false;
while ((input.read(buffer) != -1) && (!identified)) {
identified = identify(buffer, decoder);
}
input.close();
if (identified) {
return charset;
} else {
return null;
}
} catch (Exception e) {
return null;
}
}
private boolean identify(byte[] bytes, CharsetDecoder decoder) {
try {
decoder.decode(ByteBuffer.wrap(bytes));
} catch (CharacterCodingException e) {
return false;
}
return true;
}
public static void main(String[] args) {
try{
File f = new File("/Users/YI/Desktop/audio.pcm");
Set<String> charsetNames = Charset.availableCharsets().keySet();
String[] tmp = new String[charsetNames.size()];
String[] charsetsToBeTested = charsetNames.toArray(tmp);
CharsetDetector cd = new CharsetDetector();
Charset charset = cd.detectCharset(f, charsetsToBeTested);
if (charset != null) {
System.out.println(charset.toString());
FileInputStream is = new FileInputStream(f);
while(true){
byte[] by = new byte[1024];
int read = is.read(by);
if(read == -1)
break;
}
}else{
System.out.println("Unrecognized charset.");
}
}catch(Exception e){
}
}
}
YI wechat
欢迎您扫一扫上面的微信公众号,订阅我的博客!