private void expect (XMLSource source, int pos, String expected) { int end = Math.min (pos + expected.length (), source.length ()); String s = source.substring (pos, end); if (!expected.equals (s)) throw new XMLParseException ("Expected '"+expected+"' but found '"+s+"'", source, pos); }
protected void nextChars (String expected, int startPos, String errorMessage) { int len = expected.length () - (pos - startPos); if (pos + len > source.length ()) throw new XMLParseException (errorMessage, source, startPos); String s = source.substring (startPos, startPos + expected.length ()); if (!expected.equals (s)) throw new XMLParseException (errorMessage, source, startPos); pos += len; }
/** * This moves the line and column information by the text found in the source. */ protected void moveToOffset (XMLSource source, int offset) { offset = Math.min (source.length (), offset); for (int i=0; i<offset; i++) { char c = source.charAt (i); //System.out.println (line+":"+column+" "+c+" ("+((int)c)+")"); if (c == '\r' || c == '\n') { line ++; column = 1; if (c == '\r' && i+1 < source.length () && source.charAt (i+1) == '\n') i ++; lineStartOffset = i + 1; } else if (c == '\t') column += (8 - (column % 8)) + 1; else column ++; } }
protected String lookAheadForErrorMessage (String conditionalPrefix, int pos, int len) { String found = ""; if (pos < source.length ()) { int len2 = source.length () - pos; len = Math.min (len, len2); String s = source.substring (pos, pos+len); if (len != len2) s += "..."; if (conditionalPrefix == null) found = TextUtils.escapeJavaString (s); else found = " " + conditionalPrefix + " " + TextUtils.escapeJavaString (s); } return found; }
protected char nextChar (String errorMessage) { if (pos >= source.length ()) throw new XMLParseException (errorMessage, source, pos); return source.charAt (pos ++); }
/** Advance the current position past any whitespace in the input */ protected void skipWhiteSpace () { while (pos < source.length () && charValidator.isWhitespace (source.charAt (pos))) pos ++; }
/** * Verify the character at * @param source * @param offset * @return */ public String isValid (XMLSource source, int offset) { char c = source.charAt (offset); if (Character.isHighSurrogate (c)) { if (offset + 1 >= source.length ()) return "Unexpected end of input"; char c2 = source.charAt (offset + 1); if (Character.isLowSurrogate (c2)) return isValid (Character.toCodePoint (c, c2)); return "Character after first in surrogate pair is not between 0xDC00 and 0xDFFF: "+Integer.toHexString (c2); } return isValid (c); }
/** * Check that the next character is {@code expected} and skip it */ protected void expect (char expected) { if (pos >= source.length () || source.charAt (pos) != expected) { throw new XMLParseException ("Expected '"+expected+"'"+lookAheadForErrorMessage ("but found", pos, 20), source, pos); } pos ++; }
/** Read an XML name */ protected void parseName (String objectName) { int startPos = pos; if (pos < source.length () && charValidator.isNameStartChar (source.charAt (pos))) { pos ++; while (pos < source.length () && charValidator.isNameChar (source.charAt (pos))) pos ++; } if (pos == startPos) { throw new XMLParseException ("Expected valid XML name for "+objectName+lookAheadForErrorMessage ("but found", startPos, 20), source, startPos); } }
protected void parseEntity (Token token) { token.setType (Type.ENTITY); char c; if (pos < source.length ()) { c = source.charAt (pos); if (c == '#') pos ++; } while (pos < source.length ()) { c = source.charAt (pos); if (c == ';') break; if (!charValidator.isNameChar (c)) throw new XMLParseException ("Illegal character in entity: ["+c+"] ("+Integer.toHexString (c)+")", source, pos); pos ++; } expect (';'); verifyEntity (token.getStartOffset (), pos); }
protected void parseDocTypeComment (Token token) { expect ('-'); while (pos < source.length ()) { char c = source.charAt (pos); if (c == '-') { c = nextChar ("Expected '--'"); if (c == '-') { pos ++; break; } } pos ++; } token.setType (Type.DOCTYPE_COMMENT); }
if (pos >= source.length () || source.charAt (pos) != '-') throw new XMLParseException ("Expected '<!--'", source, pos-3); if (pos >= source.length ()) throw new XMLParseException ("Expected '-->'", source, pos); if (pos >= source.length ()) throw new XMLParseException ("Expected '-->'", source, pos-1); continue; if (pos >= source.length ()) throw new XMLParseException ("Expected '-->'", source, pos-2);
protected void parseDocTypeText (Token token) { token.setType (Type.TEXT); pos --; while (pos < source.length () && getCharValidator ().isNameChar (source.charAt (pos))) pos ++; String s = source.substring (token.getStartOffset (), pos); if (s.length () == 0) throw new XMLParseException ("Expected some text"+lookAheadForErrorMessage ("but found", token.getStartOffset (), 20), token); // TODO How about "<!DOCTYPE SYSTEM ..."? if ("SYSTEM".equals (s)) token.setType (Type.DOCTYPE_SYSTEM); else if ("PUBLIC".equals (s)) token.setType (Type.DOCTYPE_PUBLIC); else if ("NDATA".equals (s)) token.setType (Type.DOCTYPE_NDATA); }
/** Read one of "<tag", "<?pi", "<!--", "<![CDATA[" or a end tag. */ protected void parseBeginSomething (Token token) { if (pos >= source.length ()) throw new XMLParseException ("Unexpected end of input. Expected start or end tag, processing instruction, comment or CDATA", source, pos); char c = source.charAt (pos); switch (c) { case '?': pos ++; parseProcessingInstruction (token); break; case '!': pos ++; parseExcalamation (token); break; case '/': pos ++; parseEndElement (token); break; default: parseBeginElement (token); break; } }
/** Read the name of an element. * * <p>The resulting token will contain the '<' plus any whitespace between * it and the name plus the name itself but no whitespace after the name. */ protected void parseBeginElement (Token token) { token.setType (Type.BEGIN_ELEMENT); inStartElement = true; skipWhiteSpace (); int nameStartOffset = pos; parseName ("start tag"); if (pos == nameStartOffset) throw new XMLParseException ("Missing element name", token); if (pos >= source.length ()) throw new XMLParseException ("Missing '>' of start tag", source, pos); char c = source.charAt (pos); if (!charValidator.isWhitespace (c) && c != '/' && c != '>') throw new XMLParseException ("Expected whitespace, '>' or '/>' after element name", source, pos); }
/** * If the next token is whitespace, skip it. * * @param tokenizer * @param startToken This might be whitespace * @param docType * @return The current or the next token. */ protected Token skipOptionalWhitespace (XMLTokenizer tokenizer, Token startToken, DocType docType) { if (startToken == null) throw new XMLParseException ("Unexpected EOF after '<!DOCTYPE'", tokenizer.getSource (), tokenizer.getSource ().length ()); Token token = startToken; if (token.getType () == Type.DTD_WHITESPACE) { docType.add (toNode (token)); token = tokenizer.next (); if (token == null) throw new XMLParseException ("Unexpected EOF after '<!DOCTYPE'", startToken); } return token; }