/** * Set the connection for this parser. * This method creates a new <code>Lexer</code> reading from the connection. * @param connection A fully conditioned connection. The connect() * method will be called so it need not be connected yet. * @exception ParserException if the character set specified in the * HTTP header is not supported, or an i/o exception occurs creating the * lexer. * @see #setLexer */ public void setConnection (URLConnection connection) throws ParserException { if (null == connection) throw new IllegalArgumentException ("connection cannot be null"); setLexer (new Lexer (connection)); }
/** * Set the connection for this parser. * This method creates a new <code>Lexer</code> reading from the connection. * @param connection A fully conditioned connection. The connect() * method will be called so it need not be connected yet. * @exception ParserException if the character set specified in the * HTTP header is not supported, or an i/o exception occurs creating the * lexer. * @see #setLexer * @see #getConnection * @exception IllegalArgumentException if <code>connection</code> is <code>null</code>. * @exception ParserException if a problem occurs in connecting. */ public void setConnection (URLConnection connection) throws ParserException { if (null == connection) throw new IllegalArgumentException ("connection cannot be null"); setLexer (new Lexer (connection)); }
/** * Initializes the parser with the given input HTML String. * @param inputHTML the input HTML that is to be parsed. * @throws ParserException If a error occurs in setting up the * underlying Lexer. */ public void setInputHTML (String inputHTML) throws ParserException { if (null == inputHTML) throw new IllegalArgumentException ("html cannot be null"); if (!"".equals (inputHTML)) setLexer (new Lexer (new Page (inputHTML))); }
/** * Initializes the parser with the given input HTML String. * @param inputHTML the input HTML that is to be parsed. * @throws ParserException If a error occurs in setting up the * underlying Lexer. * @exception IllegalArgumentException if <code>inputHTML</code> is <code>null</code>. */ public void setInputHTML (String inputHTML) throws ParserException { if (null == inputHTML) throw new IllegalArgumentException ("html cannot be null"); if (!"".equals (inputHTML)) setLexer (new Lexer (new Page (inputHTML))); }
/** * Construct a parser using the provided lexer and feedback object. * This would be used to create a parser for special cases where the * normal creation of a lexer on a URLConnection needs to be customized. * @param lexer The lexer to draw characters from. * @param fb The object to use when information, * warning and error messages are produced. If <em>null</em> no feedback * is provided. */ public Parser (Lexer lexer, ParserFeedback fb) { setFeedback (fb); if (null == lexer) throw new IllegalArgumentException ("lexer cannot be null"); setLexer (lexer); setNodeFactory (new PrototypicalNodeFactory ()); }
/** * Construct a parser using the provided lexer and feedback object. * This would be used to create a parser for special cases where the * normal creation of a lexer on a URLConnection needs to be customized. * @param lexer The lexer to draw characters from. * @param fb The object to use when information, * warning and error messages are produced. If <em>null</em> no feedback * is provided. */ public Parser (Lexer lexer, ParserFeedback fb) { setFeedback (fb); setLexer (lexer); setNodeFactory (new PrototypicalNodeFactory ()); }
/** * Create a Parser Object having a String Object as input (instead of a url or a string representing the url location). * <BR>The string will be parsed as it would be a file. * @param input The string in input. * @return The Parser Object with the string as input stream. */ public static Parser createParserParsingAnInputString (String input) throws ParserException, UnsupportedEncodingException { Parser parser = new Parser(); Lexer lexer = new Lexer(); Page page = new Page(input); lexer.setPage(page); parser.setLexer(lexer); return parser; }
/** * Create a Parser Object having a String Object as input (instead of a url or a string representing the url location). * <BR>The string will be parsed as it would be a file. * @param input The string in input. * @return The Parser Object with the string as input stream. */ public static Parser createParserParsingAnInputString (String input) throws ParserException, UnsupportedEncodingException { Parser parser = new Parser(); Lexer lexer = new Lexer(); Page page = new Page(input); lexer.setPage(page); parser.setLexer(lexer); return parser; }
parser.setLexer(lexer); if(CMSUtil.autoCloseHtmlTag())
/** * @see org.opencms.util.I_CmsHtmlNodeVisitor#process(java.lang.String, java.lang.String) */ public String process(String html, String encoding) throws ParserException { m_result = new StringBuffer(); Parser parser = new Parser(); Lexer lexer = new Lexer(); // initialize the page with the given char set Page page = new Page(html, encoding); lexer.setPage(page); parser.setLexer(lexer); if ((m_noAutoCloseTags != null) && (m_noAutoCloseTags.size() > 0)) { // Degrade Composite tags that do have children in the DOM tree // to simple single tags: This allows to finish this tag with opened HTML tags without the effect // that html parser will generate the closing tags. PrototypicalNodeFactory factory = configureNoAutoCorrectionTags(); lexer.setNodeFactory(factory); } // process the page using the given visitor parser.visitAllNodesWith(this); // return the result return getResult(); }
/** * @see org.opencms.util.I_CmsHtmlNodeVisitor#process(java.lang.String, java.lang.String) */ public String process(String html, String encoding) throws ParserException { m_result = new StringBuffer(); Parser parser = new Parser(); Lexer lexer = new Lexer(); // initialize the page with the given char set Page page = new Page(html, encoding); lexer.setPage(page); parser.setLexer(lexer); if (m_noAutoCloseTags != null && m_noAutoCloseTags.size() > 0) { // Degrade Composite tags that do have children in the DOM tree // to simple single tags: This allows to finish this tag with opened HTML tags without the effect // that html parser will generate the closing tags. PrototypicalNodeFactory factory = configureNoAutoCorrectionTags(); lexer.setNodeFactory(factory); } // process the page using the given visitor parser.visitAllNodesWith(this); // return the result return getResult(); }
Page page = new Page(html, encoding); lexer.setPage(page); parser.setLexer(lexer);
/** * Extract the text from a HTML page.<p> * * @param in the html content input stream * @param encoding the encoding of the content * * @return the extracted text from the page * @throws ParserException if the parsing of the HTML failed * @throws UnsupportedEncodingException if the given encoding is not supported */ public static String extractText(InputStream in, String encoding) throws ParserException, UnsupportedEncodingException { Parser parser = new Parser(); Lexer lexer = new Lexer(); Page page = new Page(in, encoding); lexer.setPage(page); parser.setLexer(lexer); StringBean stringBean = new StringBean(); parser.visitAllNodesWith(stringBean); String result = stringBean.getStrings(); return result == null ? "" : result; }
parser.setLexer(lexer);
/** * Extract the text from a HTML page.<p> * * @param in the html content input stream * @param encoding the encoding of the content * * @return the extracted text from the page * @throws ParserException if the parsing of the HTML failed * @throws UnsupportedEncodingException if the given encoding is not supported */ public static String extractText(InputStream in, String encoding) throws ParserException, UnsupportedEncodingException { Parser parser = new Parser(); Lexer lexer = new Lexer(); Page page = new Page(in, encoding); lexer.setPage(page); parser.setLexer(lexer); StringBean stringBean = new StringBean(); parser.visitAllNodesWith(stringBean); return stringBean.getStrings(); }
/** * Extract the text from a HTML page.<p> * * @param in the html content input stream * @param encoding the encoding of the content * * @return the extracted text from the page * @throws ParserException if the parsing of the HTML failed * @throws UnsupportedEncodingException if the given encoding is not supported */ public static String extractText(InputStream in, String encoding) throws ParserException, UnsupportedEncodingException { Parser parser = new Parser(); Lexer lexer = new Lexer(); Page page = new Page(in, encoding); lexer.setPage(page); parser.setLexer(lexer); StringBean stringBean = new StringBean(); parser.visitAllNodesWith(stringBean); String result = stringBean.getStrings(); return result == null ? "" : result; }
Page page = new Page(content); lexer.setPage(page); parser.setLexer(lexer);
Page page = new Page(content); lexer.setPage(page); parser.setLexer(lexer);