/** * returns the article from an document with its basic html structure. * * @param HTMLDocument * @param URI the uri from the document for resolving the relative anchors in the document to absolute anchors * @return String */ public String process(HTMLDocument htmlDoc, URI docUri, final BoilerpipeExtractor extractor) { final HTMLHighlighter hh = HTMLHighlighter.newExtractingInstance(); hh.setOutputHighlightOnly(true); TextDocument doc; String text = ""; try { doc = new BoilerpipeSAXInput(htmlDoc.toInputSource()).getTextDocument(); extractor.process(doc); final InputSource is = htmlDoc.toInputSource(); text = hh.process(doc, is); } catch (Exception ex) { return null; } return removeNotAllowedTags(text, docUri); }