CharsetMatch matches[] = detectAll();
@Override public Set<Charset> detect(InputStream source) throws CharsetDetectorException { Set<Charset> set = new HashSet<Charset>(); com.ibm.icu.text.CharsetDetector charsetDetector = new com.ibm.icu.text.CharsetDetector(); try { charsetDetector.setText(new BufferedInputStream(source)); CharsetMatch[] charsetMatchs = charsetDetector.detectAll(); for (CharsetMatch match : charsetMatchs) { set.add(Charset.forName(match.getName())); } } catch (IOException e) { throw new CharsetDetectorException(e.getMessage(), e); } return set; }
byte[] thisAppCanBreak = "this app can break" .getBytes("ISO-8859-1"); CharsetDetector detector = new CharsetDetector(); detector.setText(thisAppCanBreak); String tableTemplate = "%10s %10s %8s%n"; System.out.format(tableTemplate, "CONFIDENCE", "CHARSET", "LANGUAGE"); for (CharsetMatch match : detector.detectAll()) { System.out.format(tableTemplate, match .getConfidence(), match.getName(), match .getLanguage()); }
public static void main(String[] args) throws IOException { InputStream file = new FileInputStream(args[0]); try { file = new BufferedInputStream(file); CharsetDetector detector = new CharsetDetector(); detector.setText(file); String tableTemplate = "%10s %10s %8s%n"; System.out.format(tableTemplate, "CONFIDENCE", "CHARSET", "LANGUAGE"); for (CharsetMatch match : detector.detectAll()) { System.out.format(tableTemplate, match .getConfidence(), match.getName(), match .getLanguage()); } } finally { file.close(); } }
protected String getEncoding( String requiredEncoding, File file, Log log ) throws IOException { FileInputStream fis = null; try { fis = new FileInputStream( file ); CharsetDetector detector = new CharsetDetector(); detector.setDeclaredEncoding( requiredEncoding ); detector.setText( new BufferedInputStream( fis ) ); CharsetMatch[] charsets = detector.detectAll(); if ( charsets == null ) { return null; } else { return charsets[0].getName(); } } finally { IOUtil.close( fis ); } }
public static String getEncode(InputStream data) throws IOException{ CharsetDetector detector = new CharsetDetector(); detector.setText(data); CharsetMatch match = detector.detect(); String encoding = match.getName(); System.out.println("The Content in " + match.getName()); CharsetMatch[] matches = detector.detectAll(); System.out.println("All possibilities"); for (CharsetMatch m : matches) { System.out.println("CharsetName:" + m.getName() + " Confidence:" + m.getConfidence()); } return encoding; } }
public static String getEncode(byte[] data){ CharsetDetector detector = new CharsetDetector(); detector.setText(data); CharsetMatch match = detector.detect(); String encoding = match.getName(); System.out.println("The Content in " + match.getName()); CharsetMatch[] matches = detector.detectAll(); System.out.println("All possibilities"); for (CharsetMatch m : matches) { System.out.println("CharsetName:" + m.getName() + " Confidence:" + m.getConfidence()); } return encoding; }