private boolean isMetaCharset(String attrName, String attrValue, Tag tag) { if (tag.getName().equalsIgnoreCase("meta") && attrName.equalsIgnoreCase(HtmlEncoder.CONTENT)) { StartTag st = (StartTag) tag; if (st.getAttributeValue("http-equiv") != null && st.getAttributeValue("content") != null) { if (st.getAttributeValue("http-equiv").equalsIgnoreCase( "Content-Type") && st.getAttributeValue("content").toLowerCase() .contains("charset=")) { return true; } } } return false; }
/** * Returns the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name (case insensitive). * <p> * Returns <code>null</code> if the {@linkplain #getStartTag() start tag of this element} does not * {@linkplain StartTagType#hasAttributes() have attributes}, * no attribute with the specified name exists or the attribute {@linkplain Attribute#hasValue() has no value}. * <p> * This is equivalent to {@link #getStartTag()}<code>.</code>{@link StartTag#getAttributeValue(String) getAttributeValue(attributeName)}. * * @param attributeName the name of the attribute to get. * @return the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name, or <code>null</code> if the attribute does not exist or {@linkplain Attribute#hasValue() has no value}. */ public String getAttributeValue(final String attributeName) { return getStartTag().getAttributeValue(attributeName); }
final String href=startTag.getAttributeValue("href"); if (href==null || href.startsWith("javascript:")) return null; try {
/** * Normalizes naming of attributes whose values are the * encoding or a language name, so that they can be * automatically changed to the output encoding and output. * Unfortunately, this hard codes the tags to look for. * @param attrName name of the attribute * @param attrValue, value of the attribute * @param tag tag that contains the attribute * @return a normalized name for the attribute */ @Override protected String normalizeAttributeName(String attrName, String attrValue, Tag tag) { // normalize values for HTML String normalizedName = attrName; String tagName; // DWH 2-19-09 */ // <w:lang w:val="en-US" ...> tagName = tag.getName(); if (tagName.equals("w:lang") || tagName.equals("w:themefontlang")) // DWH 4-3-09 themeFontLang { StartTag st = (StartTag) tag; if (st.getAttributeValue("w:val") != null) { normalizedName = Property.LANGUAGE; return normalizedName; } } return normalizedName; }
final String alt=startTag.getAttributeValue("alt"); if (alt==null || alt.length()==0) return null; return '['+alt+']';
&& attrName.equalsIgnoreCase(HtmlEncoder.CONTENT)) { StartTag st = (StartTag) tag; if (st.getAttributeValue("http-equiv") != null) { if (st.getAttributeValue("http-equiv").equalsIgnoreCase( "Content-Language")) { normalizedName = Property.LANGUAGE;
final String getDocumentSpecifiedEncoding(EncodingDetector encodingDetector) { if (documentSpecifiedEncoding!=UNINITIALISED) return documentSpecifiedEncoding; final Tag xmlDeclarationTag=getTagAt(0); if (xmlDeclarationTag!=null && xmlDeclarationTag.getTagType()==StartTagType.XML_DECLARATION) { documentSpecifiedEncoding=((StartTag)xmlDeclarationTag).getAttributeValue("encoding"); if (documentSpecifiedEncoding!=null) return setEncoding(documentSpecifiedEncoding,xmlDeclarationTag.toString()); } // Check meta tags: for (StartTag metaTag : getAllStartTags(HTMLElementName.META)) { documentSpecifiedEncoding=metaTag.getAttributeValue("charset"); if (documentSpecifiedEncoding==null) { if (!"content-type".equalsIgnoreCase(metaTag.getAttributeValue("http-equiv"))) continue; final String contentValue=metaTag.getAttributeValue("content"); if (contentValue==null) continue; documentSpecifiedEncoding=getCharsetParameterFromHttpHeaderValue(contentValue); if (encodingDetector!=null && encodingDetector.isIncompatibleWithPreliminaryEncoding(documentSpecifiedEncoding)) continue; } if (documentSpecifiedEncoding!=null) return setEncoding(documentSpecifiedEncoding,metaTag.toString()); } return setEncoding(null,"No encoding specified in document"); }
String value2 = st2.getAttributeValue(attribute1.getName()); if(value2 == null) { System.out.println("Attribute not present in stream2: attribute1=" + attribute1.getBegin() + "; tag2=" + st2.getBegin());
String name = attrs.get(i).getName(); if ( name.equals("xmlns") || name.startsWith("xmlns:") ) { String xmlns = st.getAttributeValue(name); if ( xmlns == null ) {
&& partName.equals("xl/workbook.xml") && sTagName.equals("sheet") && "hidden".equals(startTag.getAttributeValue("state"))) { addToDocumentPart(sTagString); break;