/** * Detect encoding on {@link UnicodeInputStream} by using {@link UnicodeInputStream#getDetectedEncoding()}. * * @param in {@link UnicodeInputStream} * @return UTF encoding as a String. If encoding could not be detected, defaults to {@link StringPool#UTF_8}. * @see UnicodeInputStream#getDetectedEncoding() */ private static String detectEncoding(final UnicodeInputStream in) { String encoding = in.getDetectedEncoding(); if (encoding == null) { encoding = StringPool.UTF_8; } return encoding; }
/** * Detects optional BOM and reads UTF string from a file. * If BOM is missing, UTF-8 is assumed. * @see UnicodeInputStream */ public static String readUTFString(File file) throws IOException { if (!file.exists()) { throw new FileNotFoundException(MSG_NOT_FOUND + file); } if (!file.isFile()) { throw new IOException(MSG_NOT_A_FILE + file); } long len = file.length(); if (len >= Integer.MAX_VALUE) { len = Integer.MAX_VALUE; } UnicodeInputStream in = null; try { in = new UnicodeInputStream(new FileInputStream(file), null); FastCharArrayWriter out = new FastCharArrayWriter((int) len); String encoding = in.getDetectedEncoding(); if (encoding == null) { encoding = StringPool.UTF_8; } StreamUtil.copy(in, out, encoding); return out.toString(); } finally { StreamUtil.close(in); } }
/** * Reads UTF file content as char array. * @see UnicodeInputStream */ public static char[] readUTFChars(File file) throws IOException { if (!file.exists()) { throw new FileNotFoundException(MSG_NOT_FOUND + file); } if (!file.isFile()) { throw new IOException(MSG_NOT_A_FILE + file); } long len = file.length(); if (len >= Integer.MAX_VALUE) { len = Integer.MAX_VALUE; } UnicodeInputStream in = null; try { in = new UnicodeInputStream(new FileInputStream(file), null); FastCharArrayWriter fastCharArrayWriter = new FastCharArrayWriter((int) len); String encoding = in.getDetectedEncoding(); if (encoding == null) { encoding = StringPool.UTF_8; } StreamUtil.copy(in, fastCharArrayWriter, encoding); return fastCharArrayWriter.toCharArray(); } finally { StreamUtil.close(in); } }
/** * Detects optional BOM and reads UTF string from an input stream. * If BOM is missing, UTF-8 is assumed. */ public static String readUTFString(InputStream inputStream) throws IOException { UnicodeInputStream in = null; try { in = new UnicodeInputStream(inputStream, null); FastCharArrayWriter out = new FastCharArrayWriter(); String encoding = in.getDetectedEncoding(); if (encoding == null) { encoding = StringPool.UTF_8; } StreamUtil.copy(in, out, encoding); return out.toString(); } finally { StreamUtil.close(in); } }
@Test void testUtf8() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0xEFBBBF65); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(3, uis.getBOMSize()); assertEquals("UTF-8", uis.getDetectedEncoding()); }
@Test void testUtf16BE() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0xFEFF6565); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(2, uis.getBOMSize()); assertEquals("UTF-16BE", uis.getDetectedEncoding()); }
@Test void testUtf32LE() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0xFFFE0000); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(4, uis.getBOMSize()); assertEquals("UTF-32LE", uis.getDetectedEncoding()); }
@Test void testUtf16LE() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0xFFFE6565); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(2, uis.getBOMSize()); assertEquals("UTF-16LE", uis.getDetectedEncoding()); }
@Test void testUtf32BE() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0x0000FEFF); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(4, uis.getBOMSize()); assertEquals("UTF-32BE", uis.getDetectedEncoding()); }
@Test void testNoUtf() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0x11223344); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(0, uis.getBOMSize()); assertNull(uis.getDetectedEncoding()); } }
/** * Detect encoding on {@link UnicodeInputStream} by using {@link UnicodeInputStream#getDetectedEncoding()}. * * @param in {@link UnicodeInputStream} * @return UTF encoding as a String. If encoding could not be detected, defaults to {@link StringPool#UTF_8}. * @see UnicodeInputStream#getDetectedEncoding() */ private static String detectEncoding(final UnicodeInputStream in) { String encoding = in.getDetectedEncoding(); if (encoding == null) { encoding = StringPool.UTF_8; } return encoding; }