/** * Get the current node factory. * @return The current lexer's node factory. */ public NodeFactory getNodeFactory () { return (getLexer ().getNodeFactory ()); }
/** * Set the current node factory. * @param factory The new node factory for the current lexer. */ public void setNodeFactory (NodeFactory factory) { if (null == factory) throw new IllegalArgumentException ("node factory cannot be null"); getLexer ().setNodeFactory (factory); }
/** * Set the current node factory. * @param factory The new node factory for the current lexer. * @see #getNodeFactory * @exception IllegalArgumentException if <code>factory</code> is <code>null</code>. */ public void setNodeFactory (NodeFactory factory) { if (null == factory) throw new IllegalArgumentException ("node factory cannot be null"); getLexer ().setNodeFactory (factory); }
/** * Get the current node factory. * @return The current lexer's node factory. * @see #setNodeFactory */ public NodeFactory getNodeFactory () { return (getLexer ().getNodeFactory ()); }
/** * Reset the parser to start from the beginning again. * This assumes support for a reset from the underlying * {@link org.htmlparser.lexer.Source} object. * <p>This is cheaper (in terms of time) than resetting the URL, i.e. * <pre> * parser.setURL (parser.getURL ()); * </pre> * because the page is not refetched from the internet. * <em>Note: the nodes returned on the second parse are new * nodes and not the same nodes returned on the first parse. If you * want the same nodes for re-use, collect them in a NodeList with * {@link #parse(NodeFilter) parse(null)} and operate on the NodeList.</em> */ public void reset () { getLexer ().reset (); }
/** * Reset the parser to start from the beginning again. * This assumes support for a reset from the underlying * {@link org.htmlparser.lexer.Source} object. * <p>This is cheaper (in terms of time) than resetting the URL, i.e. * <pre> * parser.setURL (parser.getURL ()); * </pre> * because the page is not refetched from the internet. * <em>Note: the nodes returned on the second parse are new * nodes and not the same nodes returned on the first parse. If you * want the same nodes for re-use, collect them in a NodeList with * {@link #parse(NodeFilter) parse(null)} and operate on the NodeList.</em> */ public void reset () { getLexer ().reset (); }
/** * Return the current URL being parsed. * @return The current url. This is the URL for the current page. * A string passed into the constructor or set via setURL may be altered, * for example, a file name may be modified to be a URL. * @see Page#getUrl * @see #setURL */ public String getURL () { return (getLexer ().getPage ().getUrl ()); }
/** * Return the current connection. * @return The connection either created by the parser or passed into this * parser via {@link #setConnection}. * @see #setConnection(URLConnection) */ public URLConnection getConnection () { return (getLexer ().getPage ().getConnection ()); }
/** * Get the encoding for the page this parser is reading from. * This item is set from the HTTP header but may be overridden by meta * tags in the head, so this may change after the head has been parsed. * @return The encoding currently in force. */ public String getEncoding () { return (getLexer ().getPage ().getEncoding ()); }
/** * Return the current connection. * @return The connection either created by the parser or passed into this * parser via {@link #setConnection}. * @see #setConnection(URLConnection) */ public URLConnection getConnection () { return (getLexer ().getPage ().getConnection ()); }
/** * Get the encoding for the page this parser is reading from. * This item is set from the HTTP header but may be overridden by meta * tags in the head, so this may change after the head has been parsed. * @return The encoding currently in force. * @see #setEncoding */ public String getEncoding () { return (getLexer ().getPage ().getEncoding ()); }
/** * Return the current URL being parsed. * @return The current url. This is the URL for the current page. * A string passed into the constructor or set via setURL may be altered, * for example, a file name may be modified to be a URL. * @see Page#getUrl */ public String getURL () { return (getLexer ().getPage ().getUrl ()); }
/** * Set the encoding for the page this parser is reading from. * @param encoding The new character set to use. * @throws ParserException If the encoding change causes characters that * have already been consumed to differ from the characters that would * have been seen had the new encoding been in force. * @see org.htmlparser.util.EncodingChangeException */ public void setEncoding (String encoding) throws ParserException { getLexer ().getPage ().setEncoding (encoding); }
/** * Set the encoding for the page this parser is reading from. * @param encoding The new character set to use. * @throws ParserException If the encoding change causes characters that * have already been consumed to differ from the characters that would * have been seen had the new encoding been in force. * @see org.htmlparser.util.EncodingChangeException * @see #getEncoding */ public void setEncoding (String encoding) throws ParserException { getLexer ().getPage ().setEncoding (encoding); }
public void doSemanticAction () throws ParserException { String image; // get the image url image = getImageURL (); // check if it needs to be captured if (isToBeCaptured (image)) { // add the image to the list needing to be copied if (!mCopied.contains (image)) if (!mImages.contains (image)) mImages.add (image); if (getCaptureResources ()) image = makeLocalLink (image, mParser.getLexer ().getPage ().getUrl ()); // alter the link setImageURL (image); } } }
lexer = mParser.getLexer (); return (lexer.getPage ().row (lexer.getCursor ()));
lexer = mParser.getLexer (); return (lexer.getPage ().column (lexer.getCursor ()));
lexer = mParser.getLexer (); return (lexer.getPage ().column (lexer.getCursor ()));
lexer = mParser.getLexer (); return (lexer.getPage ().row (lexer.getCursor ()));
/** * Set the lexer for this parser. * The current NodeFactory is transferred to (set on) the given lexer, * since the lexer owns the node factory object. * It does not adjust the <code>feedback</code> object. * Trying to set the lexer to <code>null</code> is a no-op. * @param lexer The lexer object to use. * @see #setNodeFactory */ public void setLexer (Lexer lexer) { NodeFactory factory; String type; if (null != lexer) { // move a node factory that's been set to the new lexer factory = null; if (null != getLexer ()) factory = getLexer ().getNodeFactory (); if (null != factory) lexer.setNodeFactory (factory); mLexer = lexer; // warn about content that's not likely text type = mLexer.getPage ().getContentType (); if (type != null && !type.startsWith ("text")) getFeedback ().warning ( "URL " + mLexer.getPage ().getUrl () + " does not contain text"); } }