@Override public void mark(int readlimit) { payloadResource.mark(readlimit); } @Override
@Override public void mark(int readlimit) { payloadResource.mark(readlimit); } @Override
@Override public String sniff(Resource resource) { byte[] bbuffer = new byte[MAX_BOM_LEN]; resource.mark(MAX_BOM_LEN); try { resource.read(bbuffer, 0, MAX_BOM_LEN); resource.reset(); } catch (IOException ex) { return null; } if (bbuffer[0] == (byte)0xFE && bbuffer[1] == (byte)0xFF) return "UTF-16BE"; if (bbuffer[0] == (byte)0xFF && bbuffer[1] == (byte)0xFE) return "UTF-16LE"; if (bbuffer[0] == (byte)0xEF && bbuffer[1] == (byte)0xBB && bbuffer[2] == (byte)0xBF) return "UTF-8"; return null; } }
@Override public String sniff(Resource resource) { byte[] bbuffer = new byte[MAX_BOM_LEN]; resource.mark(MAX_BOM_LEN); try { resource.read(bbuffer, 0, MAX_BOM_LEN); resource.reset(); } catch (IOException ex) { return null; } if (bbuffer[0] == (byte)0xFE && bbuffer[1] == (byte)0xFF) return "UTF-16BE"; if (bbuffer[0] == (byte)0xFF && bbuffer[1] == (byte)0xFE) return "UTF-16LE"; if (bbuffer[0] == (byte)0xEF && bbuffer[1] == (byte)0xBB && bbuffer[2] == (byte)0xBF) return "UTF-8"; return null; } }
@Override public String sniff(Resource resource) { String charsetName = null; try { byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; resource.mark(MAX_CHARSET_READAHEAD); resource.read(bbuffer, 0, MAX_CHARSET_READAHEAD); resource.reset(); // convert to UTF-8 String -- which hopefully will not mess up the // characters we're interested in... StringBuilder sb = new StringBuilder(new String(bbuffer, "UTF-8")); String metaContentType = TagMagix.getTagAttrWhere(sb, "META", "content", "http-equiv", "Content-Type"); if (metaContentType != null) { charsetName = contentTypeToCharset(metaContentType); // override if (charsetName != null) { String upped = charsetName.toUpperCase(); if (upped.startsWith("UTF-16")) charsetName = "UTF-8"; } } return charsetName; } catch (IOException ex) { // TODO: log at FINE. return null; } } }
@Override public String sniff(Resource resource) { String charsetName = null; try { byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; resource.mark(MAX_CHARSET_READAHEAD); resource.read(bbuffer, 0, MAX_CHARSET_READAHEAD); resource.reset(); // convert to UTF-8 String -- which hopefully will not mess up the // characters we're interested in... StringBuilder sb = new StringBuilder(new String(bbuffer, "UTF-8")); String metaContentType = TagMagix.getTagAttrWhere(sb, "META", "content", "http-equiv", "Content-Type"); if (metaContentType != null) { charsetName = contentTypeToCharset(metaContentType); // override if (charsetName != null) { String upped = charsetName.toUpperCase(); if (upped.startsWith("UTF-16")) charsetName = "UTF-8"; } } return charsetName; } catch (IOException ex) { // TODO: log at FINE. return null; } } }
/** * Read first {@code sniffLength} bytes of {@code resource}'s payload, * decoding {@code Content-Encoding} if any. Reset {@code resource}'s * read position back to zero. * @param resource Resource to load bytes from * @return bytes, zero-padded if payload is shorter. * @throws IOException */ protected byte[] peekContent(Resource resource) throws IOException { byte[] bbuffer = new byte[Math.max(sniffLength, MINIMUM_SNIFF_BUFFER_SIZE)]; String encoding = resource.getHeader("content-encoding"); if ("gzip".equalsIgnoreCase(encoding) || "x-gzip".equalsIgnoreCase(encoding)) { // use larger readlimit, because gzip-ed data can be larger than the original // at low compression level. resource.mark(sniffLength + 100); @SuppressWarnings("resource") Resource z = new GzipDecodingResource(resource); z.read(bbuffer, 0, sniffLength); resource.reset(); } else { resource.mark(sniffLength); resource.read(bbuffer, 0, sniffLength); resource.reset(); } return bbuffer; }
/** * Read first {@code sniffLength} bytes of {@code resource}'s payload, * decoding {@code Content-Encoding} if any. Reset {@code resource}'s * read position back to zero. * @param resource Resource to load bytes from * @return bytes, zero-padded if payload is shorter. * @throws IOException */ protected byte[] peekContent(Resource resource) throws IOException { byte[] bbuffer = new byte[Math.max(sniffLength, MINIMUM_SNIFF_BUFFER_SIZE)]; String encoding = resource.getHeader("content-encoding"); if ("gzip".equalsIgnoreCase(encoding) || "x-gzip".equalsIgnoreCase(encoding)) { // use larger readlimit, because gzip-ed data can be larger than the original // at low compression level. resource.mark(sniffLength + 100); @SuppressWarnings("resource") Resource z = new GzipDecodingResource(resource); z.read(bbuffer, 0, sniffLength); resource.reset(); } else { resource.mark(sniffLength); resource.read(bbuffer, 0, sniffLength); resource.reset(); } return bbuffer; }
@Override public String sniff(Resource resource) { String charsetName = null; byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; // (1) UniversalDetector detector = new UniversalDetector(null); // (2) resource.mark(MAX_CHARSET_READAHEAD); try { int len = resource.read(bbuffer, 0, MAX_CHARSET_READAHEAD); resource.reset(); detector.handleData(bbuffer, 0, len); } catch (IOException ex) { // } // (3) detector.dataEnd(); // (4) charsetName = detector.getDetectedCharset(); // (5) detector.reset(); if (isCharsetSupported(charsetName)) { return charsetName; } return null; } }
decodedResource.mark(FRAMESET_SCAN_BUFFER_SIZE); int amtRead = decodedResource.read(buffer); decodedResource.reset();
@Override public String sniff(Resource resource) { String charsetName = null; byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; // (1) UniversalDetector detector = new UniversalDetector(null); // (2) resource.mark(MAX_CHARSET_READAHEAD); try { int len = resource.read(bbuffer, 0, MAX_CHARSET_READAHEAD); resource.reset(); detector.handleData(bbuffer, 0, len); } catch (IOException ex) { // } // (3) detector.dataEnd(); // (4) charsetName = detector.getDetectedCharset(); // (5) detector.reset(); if (isCharsetSupported(charsetName)) { return charsetName; } return null; } }
decodedResource.mark(FRAMESET_SCAN_BUFFER_SIZE); int amtRead = decodedResource.read(buffer); decodedResource.reset();