private boolean isBlockLevel(DomNode node) { // From the HTML spec (http://www.w3.org/TR/html401/sgml/dtd.html#block) // <!ENTITY % block "P | %heading; | %list; | %preformatted; | DL | DIV | NOSCRIPT | BLOCKQUOTE | FORM | HR | TABLE | FIELDSET | ADDRESS"> // <!ENTITY % heading "H1|H2|H3|H4|H5|H6"> // <!ENTITY % list "UL | OL"> // <!ENTITY % preformatted "PRE"> if (!(node instanceof HtmlElement)) return false; String tagName = ((HtmlElement) node).getTagName().toLowerCase(); for (String blockLevelsTagName : blockLevelsTagNames) { if (blockLevelsTagName.equals(tagName)) { return true; } } return false; }
private boolean isBlockLevel(DomNode node) { // From the HTML spec (http://www.w3.org/TR/html401/sgml/dtd.html#block) // <!ENTITY % block // "P | %heading; | %list; | %preformatted; | DL | DIV | NOSCRIPT | BLOCKQUOTE | FORM | HR | TABLE | FIELDSET | ADDRESS"> // <!ENTITY % heading "H1|H2|H3|H4|H5|H6"> // <!ENTITY % list "UL | OL"> // <!ENTITY % preformatted "PRE"> if (!(node instanceof HtmlElement)) { return false; } String tagName = ((HtmlElement) node).getTagName().toLowerCase(); for (String blockLevelsTagName : blockLevelsTagNames) { if (blockLevelsTagName.equals(tagName)) { return true; } } return false; }
/** * Returns the HTML elements that are descendants of this element and that have the specified tag name. * @param tagName the tag name to match (case-insensitive) * @param <E> the sub-element type * @return the HTML elements that are descendants of this element and that have the specified tag name */ @SuppressWarnings("unchecked") public final <E extends HtmlElement> List<E> getHtmlElementsByTagName(final String tagName) { final List<E> list = new ArrayList<E>(); final String lowerCaseTagName = tagName.toLowerCase(); final Iterable<HtmlElement> iterable = getAllHtmlChildElements(); for (final HtmlElement element : iterable) { if (lowerCaseTagName.equals(element.getTagName())) { list.add((E) element); } } return list; }
SimpleDateFormatter dateParser = ... List<DateAndTitle> result = new LinkedList(); Date lastDate = null; // for(HtmlElement node : table.getHtmlElementDescendants()) { for(HtmlElement node : findAllChildren(table)) { if( "ul".equals( node.getTagName() ) ) { String title = node.asText(); result.add(new DateAndTitle(lastDate, title); } if( "h3".equals( node.getTagName() ) ) { String dateString = node.asText(); lastDate = dateParser.parse(dateString); } }
/** * Returns the HTML elements that are descendants of this element and that have the specified tag name. * @param tagName the tag name to match (case-insensitive) * @param <E> the sub-element type * @return the HTML elements that are descendants of this element and that have the specified tag name */ @SuppressWarnings("unchecked") public final <E extends HtmlElement> List<E> getHtmlElementsByTagName(final String tagName) { final List<E> list = new ArrayList<E>(); final String lowerCaseTagName = tagName.toLowerCase(); final Iterable<HtmlElement> iterable = getAllHtmlChildElements(); for (final HtmlElement element : iterable) { if (lowerCaseTagName.equals(element.getTagName())) { list.add((E) element); } } return list; }
/** * Returns the HTML elements that are descendants of this element and that have the specified tag name. * @param tagName the tag name to match (case-insensitive) * @param <E> the sub-element type * @return the HTML elements that are descendants of this element and that have the specified tag name */ @SuppressWarnings("unchecked") public final <E extends HtmlElement> List<E> getHtmlElementsByTagName(final String tagName) { final List<E> list = new ArrayList<E>(); final String lowerCaseTagName = tagName.toLowerCase(); final Iterable<HtmlElement> iterable = getAllHtmlChildElements(); for (final HtmlElement element : iterable) { if (lowerCaseTagName.equals(element.getTagName())) { list.add((E) element); } } return list; }
@Override public String toString() { if (toString == null) { StringBuilder sb = new StringBuilder(); sb.append('<').append(element.getTagName()); NamedNodeMap attributes = element.getAttributes(); int n = attributes.getLength(); for (int i = 0; i < n; ++i) { Attr a = (Attr) attributes.item(i); sb.append(' ').append(a.getName()).append("=\"").append(a.getValue().replace("\"", """)).append("\""); } if (element.hasChildNodes()) { sb.append('>'); } else { sb.append(" />"); } toString = sb.toString(); } return toString; }
/** * Same as {@link #getElementsByAttribute(String, String, String)} but * ignoring elements that are contained in a nested form. */ @SuppressWarnings("unchecked") private <E extends HtmlElement> List<E> getFormElementsByAttribute( final String elementName, final String attributeName, final String attributeValue) { final List<E> list = new ArrayList<>(); final String lowerCaseTagName = elementName.toLowerCase(Locale.ROOT); for (final HtmlElement next : getFormHtmlElementDescendants()) { if (next.getTagName().equals(lowerCaseTagName)) { final String attValue = next.getAttribute(attributeName); if (attValue != null && attValue.equals(attributeValue)) { list.add((E) next); } } } return list; }
/** * Overwritten to throw an exception. * @param value the new value for replacing this node */ @Override public void setOuterHTML(final Object value) { throw Context.reportRuntimeError("outerHTML is read-only for tag '" + getDomNodeOrDie().getTagName() + "'"); }
/** * Same as {@link #getElementsByAttribute(String, String, String)} but * ignoring elements that are contained in a nested form. */ @SuppressWarnings("unchecked") private <E extends HtmlElement> List<E> getFormElementsByAttribute( final String elementName, final String attributeName, final String attributeValue) { final List<E> list = new ArrayList<>(); final String lowerCaseTagName = elementName.toLowerCase(Locale.ROOT); for (final HtmlElement next : getFormHtmlElementDescendants()) { if (next.getTagName().equals(lowerCaseTagName)) { final String attValue = next.getAttribute(attributeName); if (attValue != null && attValue.equals(attributeValue)) { list.add((E) next); } } } return list; }
/** * Returns all elements which are descendants of this element and match the specified search criteria. * * @param elementName the name of the element to search for * @param attributeName the name of the attribute to search for * @param attributeValue the value of the attribute to search for * @param <E> the sub-element type * @return all elements which are descendants of this element and match the specified search criteria */ @SuppressWarnings("unchecked") public final <E extends HtmlElement> List<E> getElementsByAttribute( final String elementName, final String attributeName, final String attributeValue) { final List<E> list = new ArrayList<>(); final String lowerCaseTagName = elementName.toLowerCase(Locale.ROOT); for (final HtmlElement next : getHtmlElementDescendants()) { if (next.getTagName().equals(lowerCaseTagName)) { final String attValue = next.getAttribute(attributeName); if (attValue != null && attValue.equals(attributeValue)) { list.add((E) next); } } } return list; }
/** * Returns all elements which are descendants of this element and match the specified search criteria. * * @param elementName the name of the element to search for * @param attributeName the name of the attribute to search for * @param attributeValue the value of the attribute to search for * @param <E> the sub-element type * @return all elements which are descendants of this element and match the specified search criteria */ @SuppressWarnings("unchecked") public final <E extends HtmlElement> List<E> getElementsByAttribute( final String elementName, final String attributeName, final String attributeValue) { final List<E> list = new ArrayList<E>(); final String lowerCaseTagName = elementName.toLowerCase(); for (final HtmlElement next : getAllHtmlChildElements()) { if (next.getTagName().equals(lowerCaseTagName)) { final String attValue = next.getAttribute(attributeName); if (attValue != null && attValue.equals(attributeValue)) { list.add((E) next); } } } return list; }
/** * Returns all elements which are descendants of this element and match the specified search criteria. * * @param elementName the name of the element to search for * @param attributeName the name of the attribute to search for * @param attributeValue the value of the attribute to search for * @param <E> the sub-element type * @return all elements which are descendants of this element and match the specified search criteria */ @SuppressWarnings("unchecked") public final <E extends HtmlElement> List<E> getElementsByAttribute( final String elementName, final String attributeName, final String attributeValue) { final List<E> list = new ArrayList<E>(); final String lowerCaseTagName = elementName.toLowerCase(); for (final HtmlElement next : getAllHtmlChildElements()) { if (next.getTagName().equals(lowerCaseTagName)) { final String attValue = next.getAttribute(attributeName); if (attValue != null && attValue.equals(attributeValue)) { list.add((E) next); } } } return list; }
/** * Overwritten to throw an exception. * @param value the new value for replacing this node */ @Override public void setOuterHTML(final Object value) { throw Context.reportRuntimeError("outerHTML is read-only for tag '" + getDomNodeOrDie().getTagName() + "'"); }
/** * Returns {@code true} if the specified element gets submitted when this form is submitted, * assuming that the form is submitted using the specified submit element. * * @param element the element to check * @param submitElement the element used to submit the form, or {@code null} if the form is * submitted by JavaScript * @return {@code true} if the specified element gets submitted when this form is submitted */ private static boolean isSubmittable(final HtmlElement element, final SubmittableElement submitElement) { final String tagName = element.getTagName(); if (!isValidForSubmission(element, submitElement)) { return false; } // The one submit button that was clicked can be submitted but no other ones if (element == submitElement) { return true; } if (element instanceof HtmlInput) { final HtmlInput input = (HtmlInput) element; final String type = input.getTypeAttribute().toLowerCase(Locale.ROOT); if ("submit".equals(type) || "image".equals(type) || "reset".equals(type) || "button".equals(type)) { return false; } } if (HtmlButton.TAG_NAME.equals(tagName)) { return false; } return true; }
public boolean isSelected() { assertElementNotStale(); if (element instanceof HtmlInput) return ((HtmlInput) element).isChecked(); else if (element instanceof HtmlOption) return ((HtmlOption) element).isSelected(); throw new UnsupportedOperationException("Unable to determine if element is selected. Tag name is: " + element.getTagName()); }
public void setSelected() { assertElementNotStale(); String disabledValue = element.getAttribute("disabled"); if (disabledValue.length() > 0) { throw new UnsupportedOperationException("You may not select a disabled element"); } if (element instanceof HtmlInput) ((HtmlInput) element).setChecked(true); else if (element instanceof HtmlOption) ((HtmlOption) element).setSelected(true); else throw new UnsupportedOperationException("Unable to select element. Tag name is: " + element.getTagName()); }
public class HtmlUnitTest { public static void main(String[] args) throws FailingHttpStatusCodeException, MalformedURLException, IOException { final WebClient webClient = new WebClient(); final HtmlPage page = webClient.getPage("http://www.google.com"); System.out.println(page.getTitleText()); for (HtmlElement node : page.getHtmlElementDescendants()) { if (node.getTagName().toUpperCase().equals("IMG")) { System.out.println("NAME: " + node.getTagName()); System.out.println("WIDTH:" + node.getAttribute("width")); System.out.println("HEIGHT:" + node.getAttribute("height")); System.out.println("TEXT: " + node.asText()); System.out.println("XMl: " + node.asXml()); } } } }
private static boolean isValidForSubmission(final HtmlElement element, final SubmittableElement submitElement) { final String tagName = element.getTagName(); if (!SUBMITTABLE_ELEMENT_NAMES.contains(tagName)) { return false; } if (element.hasAttribute("disabled")) { return false; } // clicked input type="image" is submitted even if it hasn't a name if (element == submitElement && element instanceof HtmlImageInput) { return true; } if (!HtmlIsIndex.TAG_NAME.equals(tagName) && !element.hasAttribute("name")) { return false; } if (!HtmlIsIndex.TAG_NAME.equals(tagName) && "".equals(element.getAttributeDirect("name"))) { return false; } if (element instanceof HtmlInput) { final String type = element.getAttributeDirect("type").toLowerCase(Locale.ROOT); if ("radio".equals(type) || "checkbox".equals(type)) { return ((HtmlInput) element).isChecked(); } } if (HtmlSelect.TAG_NAME.equals(tagName)) { return ((HtmlSelect) element).isValidForSubmission(); } return true; }
wrs.setAdditionalHeader("Referer", page.getWebResponse().getRequestSettings().getUrl().toString()); client.addRequestHeader("Accept", acceptTypes.get(el.getTagName().toLowerCase())); client.getPage(wrs); } catch (Exception e) {}