/** * Returns new {@link UnicodeInputStream} using {@link InputStream} and targetEncoding. * * @param input {@link InputStream} * @param targetEncoding Encoding to use. * @return new {@link UnicodeInputStream}. */ private static UnicodeInputStream unicodeInputStreamOf(final InputStream input, final String targetEncoding) { return new UnicodeInputStream(input, targetEncoding); }
/** * Reads lines from source files. */ public static String[] readLines(File file, String encoding) throws IOException { if (!file.exists()) { throw new FileNotFoundException(MSG_NOT_FOUND + file); } if (!file.isFile()) { throw new IOException(MSG_NOT_A_FILE + file); } List<String> list = new ArrayList<>(); InputStream in = null; try { in = new FileInputStream(file); if (encoding.startsWith("UTF")) { in = new UnicodeInputStream(in, encoding); } BufferedReader br = new BufferedReader(new InputStreamReader(in, encoding)); String strLine; while ((strLine = br.readLine()) != null) { list.add(strLine); } } finally { StreamUtil.close(in); } return list.toArray(new String[list.size()]); }
/** * Reads file content as string encoded in provided encoding. * For UTF encoded files, detects optional BOM characters. */ public static String readString(File file, String encoding) throws IOException { if (!file.exists()) { throw new FileNotFoundException(MSG_NOT_FOUND + file); } if (!file.isFile()) { throw new IOException(MSG_NOT_A_FILE + file); } long len = file.length(); if (len >= Integer.MAX_VALUE) { len = Integer.MAX_VALUE; } InputStream in = null; try { in = new FileInputStream(file); if (encoding.startsWith("UTF")) { in = new UnicodeInputStream(in, encoding); } FastCharArrayWriter out = new FastCharArrayWriter((int) len); StreamUtil.copy(in, out, encoding); return out.toString(); } finally { StreamUtil.close(in); } }
/** * Reads file content as char array. */ public static char[] readChars(File file, String encoding) throws IOException { if (!file.exists()) { throw new FileNotFoundException(MSG_NOT_FOUND + file); } if (!file.isFile()) { throw new IOException(MSG_NOT_A_FILE + file); } long len = file.length(); if (len >= Integer.MAX_VALUE) { len = Integer.MAX_VALUE; } InputStream in = null; try { in = new FileInputStream(file); if (encoding.startsWith("UTF")) { in = new UnicodeInputStream(in, encoding); } FastCharArrayWriter fastCharArrayWriter = new FastCharArrayWriter((int) len); StreamUtil.copy(in, fastCharArrayWriter, encoding); return fastCharArrayWriter.toCharArray(); } finally { StreamUtil.close(in); } }
/** * Reads UTF file content as char array. * @see UnicodeInputStream */ public static char[] readUTFChars(File file) throws IOException { if (!file.exists()) { throw new FileNotFoundException(MSG_NOT_FOUND + file); } if (!file.isFile()) { throw new IOException(MSG_NOT_A_FILE + file); } long len = file.length(); if (len >= Integer.MAX_VALUE) { len = Integer.MAX_VALUE; } UnicodeInputStream in = null; try { in = new UnicodeInputStream(new FileInputStream(file), null); FastCharArrayWriter fastCharArrayWriter = new FastCharArrayWriter((int) len); String encoding = in.getDetectedEncoding(); if (encoding == null) { encoding = StringPool.UTF_8; } StreamUtil.copy(in, fastCharArrayWriter, encoding); return fastCharArrayWriter.toCharArray(); } finally { StreamUtil.close(in); } }
/** * Detects optional BOM and reads UTF string from a file. * If BOM is missing, UTF-8 is assumed. * @see UnicodeInputStream */ public static String readUTFString(File file) throws IOException { if (!file.exists()) { throw new FileNotFoundException(MSG_NOT_FOUND + file); } if (!file.isFile()) { throw new IOException(MSG_NOT_A_FILE + file); } long len = file.length(); if (len >= Integer.MAX_VALUE) { len = Integer.MAX_VALUE; } UnicodeInputStream in = null; try { in = new UnicodeInputStream(new FileInputStream(file), null); FastCharArrayWriter out = new FastCharArrayWriter((int) len); String encoding = in.getDetectedEncoding(); if (encoding == null) { encoding = StringPool.UTF_8; } StreamUtil.copy(in, out, encoding); return out.toString(); } finally { StreamUtil.close(in); } }
/** * Detects optional BOM and reads UTF string from an input stream. * If BOM is missing, UTF-8 is assumed. */ public static String readUTFString(InputStream inputStream) throws IOException { UnicodeInputStream in = null; try { in = new UnicodeInputStream(inputStream, null); FastCharArrayWriter out = new FastCharArrayWriter(); String encoding = in.getDetectedEncoding(); if (encoding == null) { encoding = StringPool.UTF_8; } StreamUtil.copy(in, out, encoding); return out.toString(); } finally { StreamUtil.close(in); } }
/** * Detects optional BOM and reads UTF {@link String} from an {@link InputStream}. * If BOM is missing, UTF-8 is assumed. * * @param inputStream {@link InputStream} to read. * @return String in UTF encoding. * @throws IOException if copy to {@link InputStream} errors. * @see #unicodeInputStreamOf(File) * @see StreamUtil#copy(InputStream, String) */ public static String readUTFString(final InputStream inputStream) throws IOException { UnicodeInputStream in = null; try { in = new UnicodeInputStream(inputStream, null); return StreamUtil.copy(in, detectEncoding(in)).toString(); } finally { StreamUtil.close(in); } }
@Test void testUtf8() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0xEFBBBF65); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(3, uis.getBOMSize()); assertEquals("UTF-8", uis.getDetectedEncoding()); }
@Test void testUtf16LE() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0xFFFE6565); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(2, uis.getBOMSize()); assertEquals("UTF-16LE", uis.getDetectedEncoding()); }
@Test void testUtf32BE() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0x0000FEFF); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(4, uis.getBOMSize()); assertEquals("UTF-32BE", uis.getDetectedEncoding()); }
@Test void testNoUtf() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0x11223344); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(0, uis.getBOMSize()); assertNull(uis.getDetectedEncoding()); } }
@Test void testUtf16BE() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0xFEFF6565); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(2, uis.getBOMSize()); assertEquals("UTF-16BE", uis.getDetectedEncoding()); }
@Test void testUtf32LE() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0xFFFE0000); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(4, uis.getBOMSize()); assertEquals("UTF-32LE", uis.getDetectedEncoding()); }
/** * Returns new {@link UnicodeInputStream} using {@link InputStream} and targetEncoding. * * @param input {@link InputStream} * @param targetEncoding Encoding to use. * @return new {@link UnicodeInputStream}. */ private static UnicodeInputStream unicodeInputStreamOf(final InputStream input, final String targetEncoding) { return new UnicodeInputStream(input, targetEncoding); }
Usage pattern: String enc = "ISO-8859-1"; // or NULL to use systemdefault FileInputStream fis = new FileInputStream(file); UnicodeInputStream uin = new UnicodeInputStream(fis, enc); enc = uin.getEncoding(); // check and skip possible BOM bytes InputStreamReader in; if (enc == null) in = new InputStreamReader(uin); else in = new InputStreamReader(uin, enc);
/** * Detects optional BOM and reads UTF {@link String} from an {@link InputStream}. * If BOM is missing, UTF-8 is assumed. * * @param inputStream {@link InputStream} to read. * @return String in UTF encoding. * @throws IOException if copy to {@link InputStream} errors. * @see #unicodeInputStreamOf(File) * @see StreamUtil#copy(InputStream, String) */ public static String readUTFString(final InputStream inputStream) throws IOException { UnicodeInputStream in = null; try { in = new UnicodeInputStream(inputStream, null); return StreamUtil.copy(in, detectEncoding(in)).toString(); } finally { StreamUtil.close(in); } }