/** * Print an error message. * @param message The message to print. * @param exception The exception for stack tracing. */ public void error (String message, ParserException exception) { if (QUIET != mMode) { System.out.println ("ERROR: " + message); if (DEBUG == mMode && (null != exception)) exception.printStackTrace (); } } }
System.err.println ("broken link " + ((FileNotFoundException)throwable).getMessage () + " ignored"); else pe.printStackTrace (); pe.printStackTrace ();
public static String parseFontHTML(String content) { hasData = false; Parser parser = Parser.createParser(content, "UTF-8"); StringBuilder sb = null; try { NodeList list = (NodeList) parser.parse(null); if (hasFont(list)) { sb = getNewHtml(list); } } catch (ParserException e) { e.printStackTrace(); } if (sb == null) { return content; } return sb.toString().replace("</FONT></FONT></FONT>", "</FONT>").replace("</FONT></FONT>", "</FONT>"); }
Parser parser = Parser.createParser(comment.getText(), "UTF-8"); NodeList htmlAnchorNodes = null; try { htmlAnchorNodes = parser .extractAllNodesThatMatch(new TagNameFilter("a")); } catch (ParserException e) { e.printStackTrace(); } int size = htmlAnchorNodes.size();
import org.htmlparser.Parser; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; ... String html; /* read your HTML into variable 'html' */ String result=null; .... try { Parser p = new Parser(html); NodeList nodes = p.parse(null); result = nodes.asString(); } catch (ParserException e) { e.printStackTrace(); }
int size; { Parser parser = Parser.createParser(comment.getText(), "UTF-8"); NodeList htmlAnchorNodes = null; try { htmlAnchorNodes = parser .extractAllNodesThatMatch(new TagNameFilter("a")); } catch (ParserException e) { e.printStackTrace(); } size = htmlAnchorNodes.size(); }
public static List<String> getLinksOnPage(final String url) { final Parser htmlParser = new Parser(url); final List<String> result = new LinkedList<String>(); try { final NodeList tagNodeList = htmlParser.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class)); for (int j = 0; j < tagNodeList.size(); j++) { final LinkTag loopLink = (LinkTag) tagNodeList.elementAt(j); final String loopLinkStr = loopLink.getLink(); result.add(loopLinkStr); } } catch (ParserException e) { e.printStackTrace(); // TODO handle error } return result; }
public static List<String> getLinksOnPage(final String url) { final Parser htmlParser = new Parser(url); final List<String> result = new LinkedList<String>(); try { final NodeList tagNodeList = htmlParser.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class)); for (int j = 0; j < tagNodeList.size(); j++) { final LinkTag loopLink = (LinkTag) tagNodeList.elementAt(j); final String loopLinkStr = loopLink.getLink(); result.add(loopLinkStr); } } catch (ParserException e) { e.printStackTrace(); // TODO handle error } return result; }
public void annotateHTMLContent(InputStream is, String charSet, String fileContext, CaptureSearchResult result) { ParseContext context = new ParseContext(); Node node; try { ContextAwareLexer lex = new ContextAwareLexer( new Lexer(new Page(is,charSet)),context); while((node = lex.nextNode()) != null) { // System.err.println("\nDEBUG-Node:js("+context.isInJS()+")css("+context.isInCSS()+"):"); // System.err.println("-------------------/START"); // System.err.println(node.toHtml(true)); // System.err.println("-------------------/END"); rules.handleNode(context, node); } rules.handleParseComplete(context); } catch (ParserException e) { // TODO Auto-generated catch block e.printStackTrace(); LOGGER.warning(fileContext + " " + e.getLocalizedMessage()); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); LOGGER.warning(fileContext + " " + e.getLocalizedMessage()); } catch (IOException e) { LOGGER.warning(fileContext + " " + e.getLocalizedMessage()); } } }
public void annotateHTMLContent(InputStream is, String charSet, String fileContext, CaptureSearchResult result) { ParseContext context = new ParseContext(); Node node; try { ContextAwareLexer lex = new ContextAwareLexer( new Lexer(new Page(is,charSet)),context); while((node = lex.nextNode()) != null) { // System.err.println("\nDEBUG-Node:js("+context.isInJS()+")css("+context.isInCSS()+"):"); // System.err.println("-------------------/START"); // System.err.println(node.toHtml(true)); // System.err.println("-------------------/END"); rules.handleNode(context, node); } rules.handleParseComplete(context); } catch (ParserException e) { // TODO Auto-generated catch block e.printStackTrace(); LOGGER.warning(fileContext + " " + e.getLocalizedMessage()); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); LOGGER.warning(fileContext + " " + e.getLocalizedMessage()); } catch (IOException e) { LOGGER.warning(fileContext + " " + e.getLocalizedMessage()); } } }
throw new ResourceParseException(e); } catch (ParserException e) { e.printStackTrace(); throw new ResourceParseException(e); } catch(OutOfMemoryError e) {
public Resource getResource(InputStream is, MetaData parentMetaData, ResourceContainer container) throws ResourceParseException, IOException { HTMLMetaData hmd = new HTMLMetaData(parentMetaData); ExtractingParseObserver epo = new ExtractingParseObserver(hmd); LexParser parser = new LexParser(epo); CDATALexer lex = new CDATALexer(); // TODO: figure out charset: String charset = "UTF-8"; Page page; try { page = new Page(is, charset); lex.setPage(page); parser.doParse(lex); } catch (UnsupportedEncodingException e) { e.printStackTrace(); throw new ResourceParseException(e); } catch (ParserException e) { e.printStackTrace(); throw new ResourceParseException(e); } catch(OutOfMemoryError e) { throw new ResourceParseException(null); } return new HTMLResource(hmd,container); } }
} catch (ParserException e) { e.printStackTrace(); } catch (IOException e) {