/** * Converts (X)HTML response to DOM object Tree. * This version cares of charset of response. * @param unicodeData * @return the parsed document * */ private Document parseResponse(String unicodeData) throws IOException, ParserConfigurationException,SAXException,TidyException { //TODO: validate contentType for reasonable types? // NOTE: responseData encoding is server specific // Therefore we do byte -> unicode -> byte conversion // to ensure UTF-8 encoding as required by XPathUtil // convert unicode String -> UTF-8 bytes byte[] utf8data = unicodeData.getBytes(StandardCharsets.UTF_8); ByteArrayInputStream in = new ByteArrayInputStream(utf8data); boolean isXML = JOrphanUtils.isXML(utf8data); // this method assumes UTF-8 input data return XPathUtil.makeDocument(in,false,false,useNameSpace(),isTolerant(),isQuiet(),showWarnings(),reportErrors() ,isXML, isDownloadDTDs()); }
/** * Converts (X)HTML response to DOM object Tree. * This version cares of charset of response. * @param unicodeData * @param extractor * @return Document * */ private Document parseResponse(String unicodeData, XPathExtractor extractor) throws IOException, ParserConfigurationException,SAXException,TidyException { //TODO: validate contentType for reasonable types? // NOTE: responseData encoding is server specific // Therefore we do byte -> unicode -> byte conversion // to ensure UTF-8 encoding as required by XPathUtil // convert unicode String -> UTF-8 bytes byte[] utf8data = unicodeData.getBytes(StandardCharsets.UTF_8); ByteArrayInputStream in = new ByteArrayInputStream(utf8data); boolean isXML = JOrphanUtils.isXML(utf8data); // this method assumes UTF-8 input data return XPathUtil.makeDocument(in,false,false,extractor.useNameSpace(), extractor.isTolerant(),extractor.isQuiet(),extractor.showWarnings(), extractor.reportErrors(),isXML, extractor.isDownloadDTDs()); }
isWhitespace(), isTolerant()); boolean isXML = JOrphanUtils.isXML(responseData);