warcReader.setWarcTargetUriProfile(uriProfile); warcReader.setBlockDigestEnabled(bBlockDigestEnabled); warcReader.setPayloadDigestEnabled(bPayloadDigestEnabled); warcReader.setRecordHeaderMaxSize(recordHeaderMaxSize); warcReader.setPayloadHeaderMaxSize(payloadHeaderMaxSize); while ((warcRecord = warcReader.getNextRecord()) != null) { url = warcRecord.header.warcTargetUriStr; key = UrlUtils.urlToKey(url);
WarcArchiveEntryIterator(InputStream in) { super(in); // dummy call - we override most of supers methods try { WarcReader warcReader = WarcReaderFactory.getReader(in); this.iterator = warcReader.iterator(); } catch (IOException e) { log.error(e.getMessage(), e); System.err.println(e); } }
/** * Set digest options for WARC reader. * @param reader WARC reader instance */ protected void setReaderOptions(WarcReader reader) throws JhoveException { reader.setBlockDigestEnabled(bComputeBlockDigest); reader.setPayloadDigestEnabled(bComputePayloadDigest); if (!reader.setBlockDigestAlgorithm(blockDigestAlgorithm)) { throw new JhoveException(MessageConstants.ERR_BLOCK_DIGEST_INVALID + blockDigestAlgorithm); } if (!reader.setPayloadDigestAlgorithm(payloadDigestAlgorithm)) { throw new JhoveException(MessageConstants.ERR_PAYLOAD_DIGEST_INVALID + payloadDigestAlgorithm); } reader.setBlockDigestEncoding(blockDigestEncoding); reader.setPayloadDigestEncoding(payloadDigestEncoding); if (bStrictTargetUriValidation) { reader.setWarcTargetUriProfile(UriProfile.RFC3986); } else { reader.setWarcTargetUriProfile(UriProfile.RFC3986_ABS_16BIT_LAX); } if (bStrictUriValidation) { reader.setUriProfile(UriProfile.RFC3986); } else { reader.setUriProfile(UriProfile.RFC3986_ABS_16BIT_LAX); } }
r.warcReader = WarcReaderFactory.getReaderUncompressed(); r.warcReader .setWarcTargetUriProfile(UriProfile.RFC3986_ABS_16BIT_LAX); r.warcReader.setBlockDigestEnabled(false); r.warcReader.setPayloadDigestEnabled(false); r.warcRecord = r.warcReader.getNextRecordFrom(in, offset); if (r.warcRecord != null) { WARCRecordType rectype = getWARCRecordType(r.warcRecord);
/** * Skips InputStream to next record past <code>start</code> or not at all if start is * exactly the start of a record */ private void skipToNextRecord(long start) throws IOException { // Skip record by record until (position in input stream) >= start while (fsin.getCount() < start && warcReader.getNextRecord() != null) { } ; lastRecordEnd = fsin.getCount(); } }
@Override public int parse(InputStream stream, RepInfo info, int parseIndex) throws IOException { WarcReader reader = WarcReaderFactory.getReader(new InputStreamNoSkip(stream), 8192); try { info.setFormat(_format[0]); info.setMimeType(_mimeType[0]); info.setModule(this); setReaderOptions(reader); parseRecords(reader); info.setValid(reader.isCompliant()); info.setWellFormed(reader.isCompliant()); reportResults(reader, info); if (reader.isCompliant()) { info.setSigMatch(_name); } } catch (JhoveException e) { info.setMessage(new ErrorMessage(e.getMessage())); info.setValid(false); info.setWellFormed(false); } finally { if(reader != null) { reader.close(); reader = null; } } return 0; }
@Override public void close() throws IOException { if (warcRecord != null) { warcRecord.close(); } if (warcReader != null) { warcReader.close(); } if (arcRecord != null) { arcRecord.close(); } if (arcReader != null) { arcReader.close(); } if (gzipEntry != null) { gzipEntry.close(); } if (gzipReader != null) { gzipReader.close(); } if (pbin != null) { pbin.close(); } }
repInfo.setSize(reader.getConsumed());
r.warcReader = WarcReaderFactory.getReaderUncompressed(); r.warcReader .setWarcTargetUriProfile(UriProfile.RFC3986_ABS_16BIT_LAX); r.warcReader.setBlockDigestEnabled(false); r.warcReader.setPayloadDigestEnabled(false); r.warcRecord = r.warcReader.getNextRecordFrom(in, offset); if (r.warcRecord != null) { WARCRecordType rectype = getWARCRecordType(r.warcRecord);
/** * Parse WARC records. Parsing should be straight forward with all records accessible through the same source. * @param reader WARC reader used to parse records * @throws EOFException if EOF occurs prematurely * @throws IOException if an IO error occurs while processing * @throws JhoveException if a serious problem needs to be reported */ protected void parseRecords(WarcReader reader) throws IOException, JhoveException { if (reader != null) { WarcRecord record; while ((record = reader.getNextRecord()) != null) { processRecord(record); reader.diagnostics.addAll(record.diagnostics); } } else { throw new JhoveException(MessageConstants.ERR_RECORD_NULL); } }
@Override public void close() throws IOException { if (warcRecord != null) { warcRecord.close(); } if (warcReader != null) { warcReader.close(); } if (arcRecord != null) { arcRecord.close(); } if (arcReader != null) { arcReader.close(); } if (gzipEntry != null) { gzipEntry.close(); } if (gzipReader != null) { gzipReader.close(); } if (pbin != null) { pbin.close(); } }
long bufferMarkAtEnd = 0; while ((arcRecord = warcReader.getNextRecord()) != null) {
WarcArchiveEntryIterator(InputStream in) { super(in); // dummy call - we override most of supers methods try { WarcReader warcReader = WarcReaderFactory.getReader(in); this.iterator = warcReader.iterator(); } catch (IOException e) { LOGGER.error(e); System.err.println(e); } }
InputStream warcIn = request.getSourceInputStream(); WarcReader warcReader = WarcReaderFactory.getReader(warcIn); Iterator<WarcRecord> iterator = warcReader.iterator(); try { WarcRecord record = null;