/** * Indicates whether the specified tag is a {@linkplain #DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT downlevel-revealed conditional comment} "if" tag * (<code><![if<var> ... </var>]></code>). * * @param tag the {@link Tag} to test. * @return <code>true</code> if the specified tag is a <a target="_blank" href="http://en.wikipedia.org/wiki/Conditional_comment">conditional comment</a> "if" tag, otherwise <code>false</code>. */ public static boolean isConditionalCommentIfTag(final Tag tag) { return tag.getName()==StartTagTypeMicrosoftDownlevelRevealedConditionalComment.IF; }
/** * Indicates whether the specified tag is a {@linkplain #DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT downlevel-revealed conditional comment} "endif" tag * (<code><![endif]></code>). * * @param tag the {@link Tag} to test. * @return <code>true</code> if the specified tag is a <a target="_blank" href="http://en.wikipedia.org/wiki/Conditional_comment">conditional comment</a> "endif" tag, otherwise <code>false</code>. */ public static boolean isConditionalCommentEndifTag(final Tag tag) { return tag.getName()==StartTagTypeMicrosoftDownlevelRevealedConditionalComment.ENDIF; }
protected Tag constructTagAt(final Source source, final int pos) { final Tag tag=super.constructTagAt(source,pos); if (tag==null) return null; final String name=tag.getName(); if (name!=ELEMENT && name!=ATTLIST && name!=ENTITY && name!=NOTATION) return null; // can use == instead of .equals() because the names are in HtmlElements.CONSTANT_NAME_MAP return tag; }
protected Tag constructTagAt(final Source source, final int pos) { final Tag tag=super.constructTagAt(source,pos); if (tag==null) return null; final String name=tag.getName(); if (name!=IF && name!=ENDIF) return null; // can use == instead of .equals() because the names are in HtmlElements.CONSTANT_NAME_MAP return tag; } }
public String getElementType(Tag element) { if (element.getTagType() == StartTagType.COMMENT) { return Code.TYPE_COMMENT; } if (element.getTagType() == StartTagType.XML_PROCESSING_INSTRUCTION) { return Code.TYPE_XML_PROCESSING_INSTRUCTION; } Map<String, Object> rule = configReader.getElementRule(element.getName().toLowerCase()); if (rule != null && rule.containsKey(ELEMENT_TYPE)) { return (String) rule.get(ELEMENT_TYPE); } return element.getName(); }
/** * Normalizes naming of attributes whose values are the * encoding or a language name, so that they can be * automatically changed to the output encoding and output. * Unfortunately, this hard codes the tags to look for. * @param attrName name of the attribute * @param attrValue, value of the attribute * @param tag tag that contains the attribute * @return a normalized name for the attribute */ @Override protected String normalizeAttributeName(String attrName, String attrValue, Tag tag) { // normalize values for HTML String normalizedName = attrName; String tagName; // DWH 2-19-09 */ // <w:lang w:val="en-US" ...> tagName = tag.getName(); if (tagName.equals("w:lang") || tagName.equals("w:themefontlang")) // DWH 4-3-09 themeFontLang { StartTag st = (StartTag) tag; if (st.getAttributeValue("w:val") != null) { normalizedName = Property.LANGUAGE; return normalizedName; } } return normalizedName; }
private boolean isMetaCharset(String attrName, String attrValue, Tag tag) { if (tag.getName().equalsIgnoreCase("meta") && attrName.equalsIgnoreCase(HtmlEncoder.CONTENT)) { StartTag st = (StartTag) tag; if (st.getAttributeValue("http-equiv") != null && st.getAttributeValue("content") != null) { if (st.getAttributeValue("http-equiv").equalsIgnoreCase( "Content-Type") && st.getAttributeValue("content").toLowerCase() .contains("charset=")) { return true; } } } return false; }
if (quickCutTags.contains(tag.getName())) { String cutHtml = source.substring(0, tag.getBegin()); return cutHtml + getClosingTagsBehindElement(tag.getElement()) + SHORTED_FORM_END;
if(!(tag1.getName().equals(tag2.getName()))) { System.out.println("Tag name mismatch: tag1=" + tag1.getBegin() + "; tag2=" + tag2.getBegin()); System.out.println("Expected: " + tag1.getName() + "; Actual: " + tag2.getName()); return false; if(tags2.get(index2 + 1).getName().equals(tag1.getName())) { index2++; if(tags1.get(index1 + 1).getName().equals(tag2.getName())) { index1++;
if (segment instanceof Tag) { Tag tag = (Tag)segment; if (getConfig().getElementRuleTypeCandidate(tag.getName()) == RULE_TYPE.INLINE_ELEMENT || getConfig().getElementRuleTypeCandidate(tag.getName()) == RULE_TYPE.INLINE_EXCLUDED_ELEMENT || (getEventBuilder().isInsideTextRun() && (tag .getTagType() == StartTagType.COMMENT || tag
protected Tag constructTagAt(final Source source, final int pos) { final Tag tag=super.constructTagAt(source,pos); if (tag==null) return null; // A mason named block does not have a '%' before its closing '>' delimiter and requires a matching end tag. if (source.charAt(tag.getEnd()-2)=='%') return null; // this is a common server tag, not a named block if (source.getNextEndTag(tag.getEnd(),tag.getName(),getCorrespondingEndTagType())==null) return null; return tag; } }
@Override protected void preProcess(Segment segment) { super.preProcess(segment); // let the handlers deal with wellformed content if (getConfig().isWellformed()) { return; } // otherwise we can't assume a valid end tag and we must close any // TextUnits when we see a non inline tag if (segment instanceof Tag) { // We just hit a tag that could close the current TextUnit final Tag tag = (Tag) segment; boolean inlineTag = false; if (getConfig().getElementRuleTypeCandidate(tag.getName()) == RULE_TYPE.INLINE_ELEMENT || getConfig().getElementRuleTypeCandidate(tag.getName()) == RULE_TYPE.INLINE_EXCLUDED_ELEMENT || (getEventBuilder().isInsideTextRun() && (tag .getTagType() == StartTagType.COMMENT || tag .getTagType() == StartTagType.XML_PROCESSING_INSTRUCTION)) || (getConfig().isInlineCdata() && tag.getName().equals("![cdata["))) { inlineTag = true; } // if its an inline code let the handlers deal with it if (getEventBuilder().isCurrentTextUnit() && !inlineTag) { getEventBuilder().endTextUnit(); } } }
@Override public String filter(String source, Map<String, Object> properties) { Source sourceHtml = new Source(source); sourceHtml.setLogger(null); sourceHtml.fullSequentialParse(); OutputDocument outputDocument = new OutputDocument(sourceHtml); List<Tag> tags = sourceHtml.getAllTags(); int pos = 0; for (Tag tag : tags) { boolean correctAndAllowedTag = processTag(tag, outputDocument); if (!correctAndAllowedTag) { String elementName = tag.getName().toLowerCase(); if (removedTags.contains(elementName) || allowedTags.contains(elementName)) { outputDocument.remove(tag); } else { outputDocument.replace(tag, StringEscapeUtils.escapeHtml(tag.toString())); } } reencodeTextSegment(sourceHtml, outputDocument, pos, tag.getBegin()); pos = tag.getEnd(); } reencodeTextSegment(sourceHtml, outputDocument, pos, sourceHtml.getEnd()); return correctNewLineSigns(outputDocument.toString(), properties); }
private boolean processTag(Tag tag, OutputDocument outputDocument) { String elementName = tag.getName().toLowerCase(); if (!allowedTags.contains(elementName)) { return false; } if (tag.getTagType() == StartTagType.NORMAL) { Element element = tag.getElement(); if (HTMLElements.getEndTagRequiredElementNames().contains(elementName)) { if (element.getEndTag() == null) { return false; } } else if (HTMLElements.getEndTagOptionalElementNames().contains(elementName) && element.getEndTag() == null) { outputDocument.insert(element.getEnd(), getEndTagHTML(elementName)); } outputDocument.replace(tag, getStartTagHTML(element.getStartTag())); return true; } if (tag.getTagType() == EndTagType.NORMAL) { if (tag.getElement() == null) { return false; } outputDocument.replace(tag, getEndTagHTML(elementName)); return true; } return false; }
handleServerCommonEscaped(tag); else if ( tag.getName().startsWith("%--") ) {
public ParserTag(net.htmlparser.jericho.Tag tag) { setName(tag.getName()); setBegin(tag.getElement().getEnd()); setEnd(tag.getElement().getBegin()); setStartTagBegin(tag.getElement().getStartTag().getBegin()); setStartTagEnd(tag.getElement().getStartTag().getEnd()); if (tag.getElement().getEndTag() != null) { setEndTagBegin(tag.getElement().getEndTag().getBegin()); setEndTagEnd(tag.getElement().getEndTag().getEnd()); } else { setEndTagBegin(tag.getElement().getStartTag().getBegin()); setEndTagEnd(tag.getElement().getStartTag().getEnd()); } setAttributes(tag.getElement().getAttributes()); }