/** * Verify the character at * @param source * @param offset * @return */ public String isValid (XMLSource source, int offset) { char c = source.charAt (offset); if (Character.isHighSurrogate (c)) { if (offset + 1 >= source.length ()) return "Unexpected end of input"; char c2 = source.charAt (offset + 1); if (Character.isLowSurrogate (c2)) return isValid (Character.toCodePoint (c, c2)); return "Character after first in surrogate pair is not between 0xDC00 and 0xDFFF: "+Integer.toHexString (c2); } return isValid (c); }
/** * This moves the line and column information by the text found in the source. */ protected void moveToOffset (XMLSource source, int offset) { offset = Math.min (source.length (), offset); for (int i=0; i<offset; i++) { char c = source.charAt (i); //System.out.println (line+":"+column+" "+c+" ("+((int)c)+")"); if (c == '\r' || c == '\n') { line ++; column = 1; if (c == '\r' && i+1 < source.length () && source.charAt (i+1) == '\n') i ++; lineStartOffset = i + 1; } else if (c == '\t') column += (8 - (column % 8)) + 1; else column ++; } }
protected char nextChar (String errorMessage) { if (pos >= source.length ()) throw new XMLParseException (errorMessage, source, pos); return source.charAt (pos ++); }
/** Advance the current position past any whitespace in the input */ protected void skipWhiteSpace () { while (pos < source.length () && charValidator.isWhitespace (source.charAt (pos))) pos ++; }
/** * Check that the next character is {@code expected} and skip it */ protected void expect (char expected) { if (pos >= source.length () || source.charAt (pos) != expected) { throw new XMLParseException ("Expected '"+expected+"'"+lookAheadForErrorMessage ("but found", pos, 20), source, pos); } pos ++; }
/** Read an XML name */ protected void parseName (String objectName) { int startPos = pos; if (pos < source.length () && charValidator.isNameStartChar (source.charAt (pos))) { pos ++; while (pos < source.length () && charValidator.isNameChar (source.charAt (pos))) pos ++; } if (pos == startPos) { throw new XMLParseException ("Expected valid XML name for "+objectName+lookAheadForErrorMessage ("but found", startPos, 20), source, startPos); } }
public String getPrefixWhiteSpace () { int pos = getStartOffset (); int N = getEndOffset (); while (pos < N) { char c = source.charAt (pos); if (!Character.isWhitespace (c)) break; pos ++; } return pos == 0 ? "" : source.substring (getStartOffset (), pos); } }
protected void parseEntity (Token token) { token.setType (Type.ENTITY); char c; if (pos < source.length ()) { c = source.charAt (pos); if (c == '#') pos ++; } while (pos < source.length ()) { c = source.charAt (pos); if (c == ';') break; if (!charValidator.isNameChar (c)) throw new XMLParseException ("Illegal character in entity: ["+c+"] ("+Integer.toHexString (c)+")", source, pos); pos ++; } expect (';'); verifyEntity (token.getStartOffset (), pos); }
protected void parseDocTypeComment (Token token) { expect ('-'); while (pos < source.length ()) { char c = source.charAt (pos); if (c == '-') { c = nextChar ("Expected '--'"); if (c == '-') { pos ++; break; } } pos ++; } token.setType (Type.DOCTYPE_COMMENT); }
/** Parse "<!--" or "<![CDATA[" */ protected void parseExcalamation (Token token) { char c = source.charAt (pos); if (c == '-') { pos ++; parseComment (token); } else if (c == '[') { pos ++; parseCData (token); } else if (c == 'D') { pos ++; parseDocType (token); } else throw new XMLParseException ("Expected '<!--' or '<![CDATA['", source, pos-2); }
if (pos >= source.length () || source.charAt (pos) != '-') throw new XMLParseException ("Expected '<!--'", source, pos-3); throw new XMLParseException ("Expected '-->'", source, pos); char c = source.charAt (pos); if (c == '-') throw new XMLParseException ("Expected '-->'", source, pos-1); c = source.charAt (pos ++); if (c != '-') continue; throw new XMLParseException ("Expected '-->'", source, pos-2); c = source.charAt (pos ++); if (c != '>') throw new XMLParseException ("XML comments must not contain '--'", source, pos-3);
throw new XMLParseException ("Expected ']]>'", source, pos); char c = source.charAt (pos); if (c == ']') throw new XMLParseException ("Expected ']]>'"+lookAheadForErrorMessage ("but found", errorPos, 20), source, errorPos); c = source.charAt (pos); if (c != ']') continue; c = source.charAt (pos + 1); if (c == '>')
protected void parseDocTypeText (Token token) { token.setType (Type.TEXT); pos --; while (pos < source.length () && getCharValidator ().isNameChar (source.charAt (pos))) pos ++; String s = source.substring (token.getStartOffset (), pos); if (s.length () == 0) throw new XMLParseException ("Expected some text"+lookAheadForErrorMessage ("but found", token.getStartOffset (), 20), token); // TODO How about "<!DOCTYPE SYSTEM ..."? if ("SYSTEM".equals (s)) token.setType (Type.DOCTYPE_SYSTEM); else if ("PUBLIC".equals (s)) token.setType (Type.DOCTYPE_PUBLIC); else if ("NDATA".equals (s)) token.setType (Type.DOCTYPE_NDATA); }
/** Read one of "<tag", "<?pi", "<!--", "<![CDATA[" or a end tag. */ protected void parseBeginSomething (Token token) { if (pos >= source.length ()) throw new XMLParseException ("Unexpected end of input. Expected start or end tag, processing instruction, comment or CDATA", source, pos); char c = source.charAt (pos); switch (c) { case '?': pos ++; parseProcessingInstruction (token); break; case '!': pos ++; parseExcalamation (token); break; case '/': pos ++; parseEndElement (token); break; default: parseBeginElement (token); break; } }
c = source.charAt (pos);
/** Read the name of an element. * * <p>The resulting token will contain the '<' plus any whitespace between * it and the name plus the name itself but no whitespace after the name. */ protected void parseBeginElement (Token token) { token.setType (Type.BEGIN_ELEMENT); inStartElement = true; skipWhiteSpace (); int nameStartOffset = pos; parseName ("start tag"); if (pos == nameStartOffset) throw new XMLParseException ("Missing element name", token); if (pos >= source.length ()) throw new XMLParseException ("Missing '>' of start tag", source, pos); char c = source.charAt (pos); if (!charValidator.isWhitespace (c) && c != '/' && c != '>') throw new XMLParseException ("Expected whitespace, '>' or '/>' after element name", source, pos); }
throw new XMLParseException ("Unexpected token "+token+" while parsing attributes of element "+parent.getName (), token); //@COBEX if (!Character.isWhitespace (token.getSource ().charAt (token.getStartOffset ()))) throw new XMLParseException ("Expected whitespace between attributes of element a but found "+token, token);