BOMInputStream bomIn = new BOMInputStream(in); int firstNonBOMByte = bomIn.read(); // Skips BOM if (bomIn.hasBOM()) { // has a UTF-8 BOM }
@Override public Reader getReader() throws Exception { BOMInputStream inputStream = new BOMInputStream(Files.newInputStream(file.toPath()), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE); if (inputStream.hasBOM()) { encoding = inputStream.getBOMCharsetName(); } return new InputStreamReader(inputStream, encoding); }
@Test public void testReadEmpty() throws Exception { final byte[] data = new byte[] {}; final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false)); assertEquals(-1, in.read()); assertFalse("hasBOM()", in.hasBOM()); assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertNull("getBOM", in.getBOM()); in.close(); }
@Test public void testReadWithBOMUtf8() throws Exception { final byte[] data = "ABC".getBytes(StandardCharsets.UTF_8); final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), ByteOrderMark.UTF_8); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM()); try { in.hasBOM(ByteOrderMark.UTF_16BE); fail("Expected IllegalArgumentException"); } catch (final IllegalArgumentException e) { // expected - not configured for UTF-16BE } in.close(); }
@Test public void testReadWithMultipleBOM() throws Exception { final byte[] data = new byte[] { 'A', 'B', 'C' }; final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_8); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertFalse("hasBOM(UTF-16BE)", in.hasBOM(ByteOrderMark.UTF_16BE)); assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM()); in.close(); }
@Test public void testReadSmall() throws Exception { final byte[] data = new byte[] { 'A', 'B' }; final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false)); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals(-1, in.read()); assertFalse("hasBOM()", in.hasBOM()); assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertNull("getBOM", in.getBOM()); in.close(); }
@Test public void testReadWithoutBOM() throws Exception { final byte[] data = new byte[] { 'A', 'B', 'C' }; final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false)); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); assertFalse("hasBOM()", in.hasBOM()); assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertNull("getBOM", in.getBOM()); in.close(); }
@Test public void testGetBOMFirstThenRead() throws Exception { final byte[] data = new byte[] { 'A', 'B', 'C' }; final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true)); assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); in.close(); }
@Test public void testReadWithBOMUtf16Le() throws Exception { final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16LE); final BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE); assertEquals('A', in.read()); assertEquals(0, in.read()); assertEquals('B', in.read()); assertEquals(0, in.read()); assertEquals('C', in.read()); assertEquals(0, in.read()); assertEquals(-1, in.read()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-16LE)", in.hasBOM(ByteOrderMark.UTF_16LE)); assertEquals("getBOM", ByteOrderMark.UTF_16LE, in.getBOM()); try { in.hasBOM(ByteOrderMark.UTF_16BE); fail("Expected IllegalArgumentException"); } catch (final IllegalArgumentException e) { // expected - not configured for UTF-16BE } in.close(); }
@Test public void testReadWithBOMUtf16Be() throws Exception { final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16BE); final BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE); assertEquals(0, in.read()); assertEquals('A', in.read()); assertEquals(0, in.read()); assertEquals('B', in.read()); assertEquals(0, in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-16BE)", in.hasBOM(ByteOrderMark.UTF_16BE)); assertEquals("getBOM", ByteOrderMark.UTF_16BE, in.getBOM()); try { in.hasBOM(ByteOrderMark.UTF_16LE); fail("Expected IllegalArgumentException"); } catch (final IllegalArgumentException e) { // expected - not configured for UTF-16LE } in.close(); }
@Test public void testReadWithBOMInclude() throws Exception { final byte[] data = new byte[] { 'A', 'B', 'C' }; final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), true); assertEquals(0xEF, in.read()); assertEquals(0xBB, in.read()); assertEquals(0xBF, in.read()); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM()); in.close(); }
@Test public void testGetBOMFirstThenReadInclude() throws Exception { final byte[] data = new byte[] { 'A', 'B', 'C' }; final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), true); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM()); assertEquals(0xEF, in.read()); assertEquals(0xBB, in.read()); assertEquals(0xBF, in.read()); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); in.close(); }
@Test public void testReadWithBOMUtf32Be() throws Exception { Assume.assumeTrue(Charset.isSupported("UTF_32BE")); final byte[] data = "ABC".getBytes("UTF_32BE"); final BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals('A', in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals('B', in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-32BE)", in.hasBOM(ByteOrderMark.UTF_32BE)); assertEquals("getBOM", ByteOrderMark.UTF_32BE, in.getBOM()); try { in.hasBOM(ByteOrderMark.UTF_32LE); fail("Expected IllegalArgumentException"); } catch (final IllegalArgumentException e) { // expected - not configured for UTF-32LE } in.close(); }
@Test public void testReadWithBOMUtf32Le() throws Exception { Assume.assumeTrue(Charset.isSupported("UTF_32LE")); final byte[] data = "ABC".getBytes("UTF_32LE"); final BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE); assertEquals('A', in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals('B', in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals('C', in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals(-1, in.read()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-32LE)", in.hasBOM(ByteOrderMark.UTF_32LE)); assertEquals("getBOM", ByteOrderMark.UTF_32LE, in.getBOM()); try { in.hasBOM(ByteOrderMark.UTF_32BE); fail("Expected IllegalArgumentException"); } catch (final IllegalArgumentException e) { // expected - not configured for UTF-32BE } in.close(); }
BOMInputStream bomIn = new BOMInputStream(in); if (bomIn.hasBOM()) { // has a UTF-8 BOM }
@Override public Reader getReader() throws Exception { BOMInputStream inputStream = new BOMInputStream(Files.newInputStream(file.toPath()), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE); if (inputStream.hasBOM()) { encoding = inputStream.getBOMCharsetName(); } return new InputStreamReader(inputStream, encoding); }
private String loadResource(Resource resource, Charset charset) throws IOException { BOMInputStream inputStream = new BOMInputStream( resource.getInputStream() ); try { if( inputStream.hasBOM() ) { logger.debug("BOM found %s", inputStream.getBOMCharsetName()); return IOUtils.toString(inputStream, inputStream.getBOMCharsetName()); } else { logger.debug("Using charset " + charset.name()); return IOUtils.toString(inputStream, charset.name()); } } finally { inputStream.close(); } }
@Override public InputStream getInputStream() { BOMInputStream bs = new BOMInputStream(super.getInputStream(), false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); try { if (bs.hasBOM()) { System.err.println("removing BOM " + bs.getBOM()); } return bs; } catch (IOException e) { return super.getInputStream(); } } };
@Override public InputStream getInputStream() { BOMInputStream bs = new BOMInputStream(super.getInputStream(), false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); try { if (bs.hasBOM()) { System.err.println("removing BOM " + bs.getBOM()); } return bs; } catch (IOException e) { return super.getInputStream(); } } };
private String loadResource(Resource resource, Charset charset) throws IOException { BOMInputStream inputStream = new BOMInputStream(resource.getInputStream()); try { if (inputStream.hasBOM()) { logger.debug("BOM found " + resource.getName() + ":" + inputStream.getBOMCharsetName()); return IOUtils.toString(inputStream, inputStream.getBOMCharsetName()); } else { logger.debug("Using charset " + resource.getName() + ":" + charset.name()); return IOUtils.toString(inputStream, charset.name()); } } finally { inputStream.close(); } }