public void setRecord(WarcRecord o) { if (o.getHeaderRecordType().compareToIgnoreCase("response") == 0) { this.warcRecord.set(o); } }
public void setRecord(WarcRecord o) { if (o.getHeaderRecordType().compareToIgnoreCase("response")==0) { this.warcRecord.set(o); } }
/** * Constructor creation from a generic WARC record * * @param o */ public WarcHTMLResponseRecord(WarcRecord o) { if (o.getHeaderRecordType().compareToIgnoreCase("response") == 0) { this.warcRecord.set(o); } createPatternSet(); }
/** * Constructor creation from a generic WARC record * @param o */ public WarcHTMLResponseRecord(WarcRecord o) { if (o.getHeaderRecordType().compareToIgnoreCase("response")==0) { this.warcRecord.set(o); } createPatternSet(); }
public static void main(String[] args) throws IOException { // use a callback class for handling WARC record data: IProcessWarcRecord processor = new SampleProcessWarcRecord(); String inputWarcFile="CC-MAIN-20140305125104-00002-ip-10-183-142-35.ec2.internal.warc.gz"; GZIPInputStream gzInputStream=new GZIPInputStream(new FileInputStream(inputWarcFile)); DataInputStream inStream=new DataInputStream(gzInputStream); WarcRecord thisWarcRecord; while ((thisWarcRecord=WarcRecord.readNextWarcRecord(inStream))!=null) { System.out.println("%% thisWarcRecord.getHeaderRecordType() = " + thisWarcRecord.getHeaderRecordType()); if (thisWarcRecord.getHeaderRecordType().equals("response")) { WarcHTMLResponseRecord htmlRecord=new WarcHTMLResponseRecord(thisWarcRecord); String thisTargetURI=htmlRecord.getTargetURI(); String thisContentUtf8 = htmlRecord.getRawRecord().getContentUTF8(); // handle WARC record content: processor.process(thisTargetURI, thisContentUtf8); } } inStream.close(); // done processing all WARC records: processor.done(); } }
while ((thisWarcRecord = WarcRecord.readNextWarcRecord(inStream)) != null) { if (thisWarcRecord.getHeaderRecordType().equals("response")) { WarcHTMLResponseRecord htmlRecord = new WarcHTMLResponseRecord(thisWarcRecord); String thisTargetURI = htmlRecord.getTargetURI();
if (wr.getHeaderRecordType().equals("response") == false) return;