public static ParseStatus read(DataInput in) throws IOException { ParseStatus res = new ParseStatus(); res.readFields(in); return res; }
System.err.println(e.getMessage()); log.error("Error parsing urlRegex: " + e.getMessage()); return new ParseStatus(ParseStatus.FAILED, "Error parsing urlRegex: " + e.getMessage()).getEmptyParseResult(content.getUrl(), configuration); } catch (ParserConfigurationException e) { System.err.println(e.getMessage()); log.error("HTML Cleaning error: " + e.getMessage()); return new ParseStatus(ParseStatus.FAILED, "HTML Cleaning error: " + e.getMessage()).getEmptyParseResult(content.getUrl(), configuration); } catch (SAXException e) { System.err.println(e.getMessage()); log.error("XML parsing error: " + e.getMessage()); return new ParseStatus(ParseStatus.FAILED, "XML parsing error: " + e.getMessage()).getEmptyParseResult(content.getUrl(), configuration); } catch (JaxenException e) { System.err.println(e.getMessage()); log.error("XPath error: " + e.getMessage()); return new ParseStatus(ParseStatus.FAILED, "XPath error: " + e.getMessage()).getEmptyParseResult(content.getUrl(), configuration);
StringUtil.toHexString(MD5Hash.digest(txtContent[0].getBytes()).getDigest())); ParseData data = new ParseData(new ParseStatus(ParseStatus.SUCCESS), txtContent[1], outlinks, contentMeta, new Metadata());
public final void write(DataOutput out) throws IOException { out.writeByte(VERSION); // write version status.write(out); // write status Text.writeString(out, title); // write title out.writeInt(outlinks.length); // write outlinks for (int i = 0; i < outlinks.length; i++) { outlinks[i].write(out); } contentMeta.write(out); // write content metadata parseMeta.write(out); }
public final void readFields(DataInput in) throws IOException { version = in.readByte(); // incompatible change from UTF8 (version < 5) to Text if (version != VERSION) throw new VersionMismatchException(VERSION, version); status = ParseStatus.read(in); title = Text.readString(in); // read title int numOutlinks = in.readInt(); outlinks = new Outlink[numOutlinks]; for (int i = 0; i < numOutlinks; i++) { outlinks[i] = Outlink.read(in); } if (version < 3) { int propertyCount = in.readInt(); // read metadata contentMeta.clear(); for (int i = 0; i < propertyCount; i++) { contentMeta.add(Text.readString(in), Text.readString(in)); } } else { contentMeta.clear(); contentMeta.readFields(in); } if (version > 3) { parseMeta.clear(); parseMeta.readFields(in); } }