@Override public int read(byte[] b) throws IOException { return payloadResource.read(b); } @Override
final Resource r = ...; r.readLock().lock(); try { r.read(); } finally { r.unlock(); }
@Override public int read() throws IOException { return payloadResource.read(); } @Override
@Override public int read(byte[] b, int off, int len) throws IOException { return payloadResource.read(b, off, len); } @Override
@Override public int read(byte[] b, int off, int len) throws IOException { return payloadResource.read(b, off, len); } @Override
@Override public int read() throws IOException { return payloadResource.read(); } @Override
@Override public int read(byte[] b) throws IOException { return payloadResource.read(b); } @Override
@Override public String sniff(Resource resource) { byte[] bbuffer = new byte[MAX_BOM_LEN]; resource.mark(MAX_BOM_LEN); try { resource.read(bbuffer, 0, MAX_BOM_LEN); resource.reset(); } catch (IOException ex) { return null; } if (bbuffer[0] == (byte)0xFE && bbuffer[1] == (byte)0xFF) return "UTF-16BE"; if (bbuffer[0] == (byte)0xFF && bbuffer[1] == (byte)0xFE) return "UTF-16LE"; if (bbuffer[0] == (byte)0xEF && bbuffer[1] == (byte)0xBB && bbuffer[2] == (byte)0xBF) return "UTF-8"; return null; } }
@Override public String sniff(Resource resource) { byte[] bbuffer = new byte[MAX_BOM_LEN]; resource.mark(MAX_BOM_LEN); try { resource.read(bbuffer, 0, MAX_BOM_LEN); resource.reset(); } catch (IOException ex) { return null; } if (bbuffer[0] == (byte)0xFE && bbuffer[1] == (byte)0xFF) return "UTF-16BE"; if (bbuffer[0] == (byte)0xFF && bbuffer[1] == (byte)0xFE) return "UTF-16LE"; if (bbuffer[0] == (byte)0xEF && bbuffer[1] == (byte)0xBB && bbuffer[2] == (byte)0xBF) return "UTF-8"; return null; } }
@Override public String sniff(Resource resource) { String charsetName = null; try { byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; resource.mark(MAX_CHARSET_READAHEAD); resource.read(bbuffer, 0, MAX_CHARSET_READAHEAD); resource.reset(); // convert to UTF-8 String -- which hopefully will not mess up the // characters we're interested in... StringBuilder sb = new StringBuilder(new String(bbuffer, "UTF-8")); String metaContentType = TagMagix.getTagAttrWhere(sb, "META", "content", "http-equiv", "Content-Type"); if (metaContentType != null) { charsetName = contentTypeToCharset(metaContentType); // override if (charsetName != null) { String upped = charsetName.toUpperCase(); if (upped.startsWith("UTF-16")) charsetName = "UTF-8"; } } return charsetName; } catch (IOException ex) { // TODO: log at FINE. return null; } } }
/** * Read first {@code sniffLength} bytes of {@code resource}'s payload, * decoding {@code Content-Encoding} if any. Reset {@code resource}'s * read position back to zero. * @param resource Resource to load bytes from * @return bytes, zero-padded if payload is shorter. * @throws IOException */ protected byte[] peekContent(Resource resource) throws IOException { byte[] bbuffer = new byte[Math.max(sniffLength, MINIMUM_SNIFF_BUFFER_SIZE)]; String encoding = resource.getHeader("content-encoding"); if ("gzip".equalsIgnoreCase(encoding) || "x-gzip".equalsIgnoreCase(encoding)) { // use larger readlimit, because gzip-ed data can be larger than the original // at low compression level. resource.mark(sniffLength + 100); @SuppressWarnings("resource") Resource z = new GzipDecodingResource(resource); z.read(bbuffer, 0, sniffLength); resource.reset(); } else { resource.mark(sniffLength); resource.read(bbuffer, 0, sniffLength); resource.reset(); } return bbuffer; }
public void testContentSniffing_JavaScript_compressed() throws Exception { Resource resource = createTestResource("text/html", getTestContent("js/1.js"), true); String mimetype = cut.sniff(resource); assertEquals("text/javascript", mimetype); // resource's payload stream must be positioned at the beginning, // which is confirmed by testing if the first two bytes are GZIP MAGIC. byte[] bytes = new byte[2]; resource.read(bytes); assertTrue("resource is properly reset to position 0", bytes[0] == (byte)0x1f && bytes[1] == (byte)0x8b); }
@Override public String sniff(Resource resource) { String charsetName = null; byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; // (1) UniversalDetector detector = new UniversalDetector(null); // (2) resource.mark(MAX_CHARSET_READAHEAD); try { int len = resource.read(bbuffer, 0, MAX_CHARSET_READAHEAD); resource.reset(); detector.handleData(bbuffer, 0, len); } catch (IOException ex) { // } // (3) detector.dataEnd(); // (4) charsetName = detector.getDetectedCharset(); // (5) detector.reset(); if (isCharsetSupported(charsetName)) { return charsetName; } return null; } }
@Override public String sniff(Resource resource) { String charsetName = null; byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; // (1) UniversalDetector detector = new UniversalDetector(null); // (2) resource.mark(MAX_CHARSET_READAHEAD); try { int len = resource.read(bbuffer, 0, MAX_CHARSET_READAHEAD); resource.reset(); detector.handleData(bbuffer, 0, len); } catch (IOException ex) { // } // (3) detector.dataEnd(); // (4) charsetName = detector.getDetectedCharset(); // (5) detector.reset(); if (isCharsetSupported(charsetName)) { return charsetName; } return null; } }
/** * uncompressed, but chunked-encoded HTTP response * @throws Exception */ public void testPlainChunkedHttpRecord() throws Exception { String payload = "hogehogehogehogehoge"; WARCRecordInfo recinfo = new TestWARCRecordInfo( TestWARCRecordInfo.buildHttpResponseBlock("200 OK", "text/plain", payload.getBytes("UTF-8"), true)); Resource res = createResource(recinfo); res.parseHeaders(); assertEquals("statusCode", 200, res.getStatusCode()); assertEquals("content-type", "text/plain", res.getHeader("Content-Type")); byte[] buf = new byte[payload.getBytes().length + 1]; int n = res.read(buf); assertEquals("content length", buf.length - 1, n); res.close(); }
/** * plain HTTP response (without any transfer/content-encoding) * @throws Exception */ public void testPlainHttpRecord() throws Exception { String payload = "hogehogehogehogehoge"; WARCRecordInfo recinfo = TestWARCRecordInfo.createHttpResponse(payload); Resource res = createResource(recinfo); res.parseHeaders(); assertEquals("statusCode", 200, res.getStatusCode()); assertEquals("content-type", "text/plain", res.getHeader("Content-Type")); byte[] buf = new byte[payload.getBytes().length + 1]; int n = res.read(buf); assertEquals("content length", buf.length - 1, n); res.close(); } /**
/** * gzip-compressed HTTP response. * @throws Exception */ public void testCompressedHttpRecord() throws Exception { String payload = "hogehogehogehogehoge"; String ctype = "text/plain"; WARCRecordInfo recinfo = new TestWARCRecordInfo( TestWARCRecordInfo.buildCompressedHttpResponseBlock(ctype, payload.getBytes())); Resource res = createResource(recinfo); res.parseHeaders(); assertEquals("statusCode", 200, res.getStatusCode()); assertEquals("content-type", ctype, res.getHeader("Content-Type")); Resource zres = TextReplayRenderer.decodeResource(res); assertTrue("wrapped with GzipDecodingResource", (zres instanceof GzipDecodingResource)); byte[] buf = new byte[payload.getBytes().length + 1]; int n = zres.read(buf); assertEquals("content length", buf.length - 1, n); res.close(); }
/** * gzip-compressed, chunked-encoded HTTP response. * @throws Exception */ public void testCompressedChunkedHttpRecord() throws Exception { String payload = "hogehogehogehogehoge"; String ctype = "text/plain"; WARCRecordInfo recinfo = new TestWARCRecordInfo( TestWARCRecordInfo.buildCompressedHttpResponseBlock(ctype, payload.getBytes(), true)); Resource res = createResource(recinfo); res.parseHeaders(); assertEquals("statusCode", 200, res.getStatusCode()); assertEquals("content-type", ctype, res.getHeader("Content-Type")); Resource zres = TextReplayRenderer.decodeResource(res); assertTrue("wrapped with GzipDecodingResource", (zres instanceof GzipDecodingResource)); byte[] buf = new byte[payload.getBytes().length + 1]; int n = zres.read(buf); assertEquals("content length", buf.length - 1, n); res.close(); }
/** * gzip-compressed HTTP response. * @throws Exception */ public void testCompressedHttpRecord() throws Exception { String payload = "hogehogehogehogehoge"; String ctype = "text/plain"; WARCRecordInfo recinfo = new TestWARCRecordInfo( TestWARCRecordInfo.buildCompressedHttpResponseBlock(ctype, payload.getBytes())); recinfo.setMimetype(ctype); TestARCReader ar = new TestARCReader(recinfo); ARCRecord rec = ar.get(0); ArcResource res = new ArcResource(rec, ar); res.parseHeaders(); assertEquals("statusCode", 200, res.getStatusCode()); assertEquals("content-type", ctype, res.getHeader("Content-Type")); Resource zres = TextReplayRenderer.decodeResource(res); assertTrue("wrapped with GzipDecodingResource", (zres instanceof GzipDecodingResource)); byte[] buf = new byte[payload.getBytes().length + 1]; int n = zres.read(buf); assertEquals("content length", buf.length - 1, n); res.close(); }
/** * gzip-compressed and chunk-encoded HTTP response. * @throws Exception */ public void testCompressedChunkedHttpRecord() throws Exception { String payload = "hogehogehogehogehoge"; String ctype = "text/plain"; WARCRecordInfo recinfo = new TestWARCRecordInfo( TestWARCRecordInfo.buildCompressedHttpResponseBlock(ctype, payload.getBytes(), true)); recinfo.setMimetype(ctype); TestARCReader ar = new TestARCReader(recinfo); ARCRecord rec = ar.get(0); ArcResource res = new ArcResource(rec, ar); res.parseHeaders(); assertEquals("statusCode", 200, res.getStatusCode()); assertEquals("content-type", ctype, res.getHeader("Content-Type")); Resource zres = TextReplayRenderer.decodeResource(res); assertTrue("wrapped with GzipDecodingResource", (zres instanceof GzipDecodingResource)); byte[] buf = new byte[payload.getBytes().length + 1]; int n = zres.read(buf); assertEquals("content length", buf.length - 1, n); res.close(); }