/** * Returns new {@link UnicodeInputStream} using {@link InputStream} and targetEncoding. * * @param input {@link InputStream} * @param targetEncoding Encoding to use. * @return new {@link UnicodeInputStream}. */ private static UnicodeInputStream unicodeInputStreamOf(final InputStream input, final String targetEncoding) { return new UnicodeInputStream(input, targetEncoding); }
/** * Detect encoding on {@link UnicodeInputStream} by using {@link UnicodeInputStream#getDetectedEncoding()}. * * @param in {@link UnicodeInputStream} * @return UTF encoding as a String. If encoding could not be detected, defaults to {@link StringPool#UTF_8}. * @see UnicodeInputStream#getDetectedEncoding() */ private static String detectEncoding(final UnicodeInputStream in) { String encoding = in.getDetectedEncoding(); if (encoding == null) { encoding = StringPool.UTF_8; } return encoding; }
/** * Reads byte from the stream. */ @Override public int read() throws IOException { init(); return internalInputStream.read(); }
@Test void testUtf8() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0xEFBBBF65); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(3, uis.getBOMSize()); assertEquals("UTF-8", uis.getDetectedEncoding()); }
/** * Reads UTF file content as char array. * @see UnicodeInputStream */ public static char[] readUTFChars(File file) throws IOException { if (!file.exists()) { throw new FileNotFoundException(MSG_NOT_FOUND + file); } if (!file.isFile()) { throw new IOException(MSG_NOT_A_FILE + file); } long len = file.length(); if (len >= Integer.MAX_VALUE) { len = Integer.MAX_VALUE; } UnicodeInputStream in = null; try { in = new UnicodeInputStream(new FileInputStream(file), null); FastCharArrayWriter fastCharArrayWriter = new FastCharArrayWriter((int) len); String encoding = in.getDetectedEncoding(); if (encoding == null) { encoding = StringPool.UTF_8; } StreamUtil.copy(in, fastCharArrayWriter, encoding); return fastCharArrayWriter.toCharArray(); } finally { StreamUtil.close(in); } }
Usage pattern: String enc = "ISO-8859-1"; // or NULL to use systemdefault FileInputStream fis = new FileInputStream(file); UnicodeInputStream uin = new UnicodeInputStream(fis, enc); enc = uin.getEncoding(); // check and skip possible BOM bytes InputStreamReader in; if (enc == null) in = new InputStreamReader(uin); else in = new InputStreamReader(uin, enc);
@Test void testUtf16LE() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0xFFFE6565); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(2, uis.getBOMSize()); assertEquals("UTF-16LE", uis.getDetectedEncoding()); }
/** * Detects optional BOM and reads UTF string from a file. * If BOM is missing, UTF-8 is assumed. * @see UnicodeInputStream */ public static String readUTFString(File file) throws IOException { if (!file.exists()) { throw new FileNotFoundException(MSG_NOT_FOUND + file); } if (!file.isFile()) { throw new IOException(MSG_NOT_A_FILE + file); } long len = file.length(); if (len >= Integer.MAX_VALUE) { len = Integer.MAX_VALUE; } UnicodeInputStream in = null; try { in = new UnicodeInputStream(new FileInputStream(file), null); FastCharArrayWriter out = new FastCharArrayWriter((int) len); String encoding = in.getDetectedEncoding(); if (encoding == null) { encoding = StringPool.UTF_8; } StreamUtil.copy(in, out, encoding); return out.toString(); } finally { StreamUtil.close(in); } }
@Test void testUtf32BE() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0x0000FEFF); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(4, uis.getBOMSize()); assertEquals("UTF-32BE", uis.getDetectedEncoding()); }
/** * Detects optional BOM and reads UTF string from an input stream. * If BOM is missing, UTF-8 is assumed. */ public static String readUTFString(InputStream inputStream) throws IOException { UnicodeInputStream in = null; try { in = new UnicodeInputStream(inputStream, null); FastCharArrayWriter out = new FastCharArrayWriter(); String encoding = in.getDetectedEncoding(); if (encoding == null) { encoding = StringPool.UTF_8; } StreamUtil.copy(in, out, encoding); return out.toString(); } finally { StreamUtil.close(in); } }
/** * Reads lines from source files. */ public static String[] readLines(File file, String encoding) throws IOException { if (!file.exists()) { throw new FileNotFoundException(MSG_NOT_FOUND + file); } if (!file.isFile()) { throw new IOException(MSG_NOT_A_FILE + file); } List<String> list = new ArrayList<>(); InputStream in = null; try { in = new FileInputStream(file); if (encoding.startsWith("UTF")) { in = new UnicodeInputStream(in, encoding); } BufferedReader br = new BufferedReader(new InputStreamReader(in, encoding)); String strLine; while ((strLine = br.readLine()) != null) { list.add(strLine); } } finally { StreamUtil.close(in); } return list.toArray(new String[list.size()]); }
/** * Returns detected UTF encoding or <code>null</code> if no UTF encoding has been detected (i.e. no BOM). * If stream is not read yet, it will be {@link #init() initalized} first. */ public String getDetectedEncoding() { if (!initialized) { try { init(); } catch (IOException ioex) { throw new IllegalStateException(ioex); } } return encoding; }
/** * Detect encoding on {@link UnicodeInputStream} by using {@link UnicodeInputStream#getDetectedEncoding()}. * * @param in {@link UnicodeInputStream} * @return UTF encoding as a String. If encoding could not be detected, defaults to {@link StringPool#UTF_8}. * @see UnicodeInputStream#getDetectedEncoding() */ private static String detectEncoding(final UnicodeInputStream in) { String encoding = in.getDetectedEncoding(); if (encoding == null) { encoding = StringPool.UTF_8; } return encoding; }
@Test void testNoUtf() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0x11223344); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(0, uis.getBOMSize()); assertNull(uis.getDetectedEncoding()); } }
/** * Reads file content as string encoded in provided encoding. * For UTF encoded files, detects optional BOM characters. */ public static String readString(File file, String encoding) throws IOException { if (!file.exists()) { throw new FileNotFoundException(MSG_NOT_FOUND + file); } if (!file.isFile()) { throw new IOException(MSG_NOT_A_FILE + file); } long len = file.length(); if (len >= Integer.MAX_VALUE) { len = Integer.MAX_VALUE; } InputStream in = null; try { in = new FileInputStream(file); if (encoding.startsWith("UTF")) { in = new UnicodeInputStream(in, encoding); } FastCharArrayWriter out = new FastCharArrayWriter((int) len); StreamUtil.copy(in, out, encoding); return out.toString(); } finally { StreamUtil.close(in); } }
/** * Reads byte from the stream. */ @Override public int read() throws IOException { init(); return internalInputStream.read(); }
@Test void testUtf16BE() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0xFEFF6565); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(2, uis.getBOMSize()); assertEquals("UTF-16BE", uis.getDetectedEncoding()); }
/** * Reads file content as char array. */ public static char[] readChars(File file, String encoding) throws IOException { if (!file.exists()) { throw new FileNotFoundException(MSG_NOT_FOUND + file); } if (!file.isFile()) { throw new IOException(MSG_NOT_A_FILE + file); } long len = file.length(); if (len >= Integer.MAX_VALUE) { len = Integer.MAX_VALUE; } InputStream in = null; try { in = new FileInputStream(file); if (encoding.startsWith("UTF")) { in = new UnicodeInputStream(in, encoding); } FastCharArrayWriter fastCharArrayWriter = new FastCharArrayWriter((int) len); StreamUtil.copy(in, fastCharArrayWriter, encoding); return fastCharArrayWriter.toCharArray(); } finally { StreamUtil.close(in); } }
/** * Returns detected UTF encoding or {@code null} if no UTF encoding has been detected (i.e. no BOM). * If stream is not read yet, it will be {@link #init() initalized} first. */ public String getDetectedEncoding() { if (!initialized) { try { init(); } catch (IOException ioex) { throw new IllegalStateException(ioex); } } return encoding; }
@Test void testUtf32LE() throws IOException { byte[] bytes = new byte[4]; Bits.putInt(bytes, 0, 0xFFFE0000); ByteArrayInputStream basis = new ByteArrayInputStream(bytes); UnicodeInputStream uis = new UnicodeInputStream(basis, null); uis.init(); assertEquals(4, uis.getBOMSize()); assertEquals("UTF-32LE", uis.getDetectedEncoding()); }