/** * Optional factory method, expected to be overridden * * @since 2.8 */ protected JsonParser _createParser(DataInput input, IOContext ctxt) throws IOException { // 13-May-2016, tatu: Need to take care not to accidentally create JSON parser for // non-JSON input. _requireJSONFactory("InputData source not (yet?) support for this format (%s)"); // Also: while we can't do full bootstrapping (due to read-ahead limitations), should // at least handle possible UTF-8 BOM int firstByte = ByteSourceJsonBootstrapper.skipUTF8BOM(input); ByteQuadsCanonicalizer can = _byteSymbolCanonicalizer.makeChild(_factoryFeatures); return new UTF8DataInputJsonParser(ctxt, _parserFeatures, input, _objectCodec, can, firstByte); }
protected final void _matchToken(String matchStr, int i) throws IOException { final int len = matchStr.length(); do { int ch = _inputData.readUnsignedByte(); if (ch != matchStr.charAt(i)) { _reportInvalidToken(ch, matchStr.substring(0, i)); } } while (++i < len); int ch = _inputData.readUnsignedByte(); if (ch >= '0' && ch != ']' && ch != '}') { // expected/allowed chars _checkMatchEnd(matchStr, i, ch); } _nextByte = ch; }
@Override public byte[] getBinaryValue(Base64Variant b64variant) throws IOException { if (_currToken != JsonToken.VALUE_STRING && (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT || _binaryValue == null)) { _reportError("Current token ("+_currToken+") not VALUE_STRING or VALUE_EMBEDDED_OBJECT, can not access as binary"); } /* To ensure that we won't see inconsistent data, better clear up * state... */ if (_tokenIncomplete) { try { _binaryValue = _decodeBase64(b64variant); } catch (IllegalArgumentException iae) { throw _constructError("Failed to decode VALUE_STRING as base64 ("+b64variant+"): "+iae.getMessage()); } /* let's clear incomplete only now; allows for accessing other * textual content in error cases */ _tokenIncomplete = false; } else { // may actually require conversion... if (_binaryValue == null) { @SuppressWarnings("resource") ByteArrayBuilder builder = _getByteArrayBuilder(); _decodeBase64(getText(), builder, b64variant); _binaryValue = builder.toByteArray(); } } return _binaryValue; }
private final void _checkMatchEnd(String matchStr, int i, int ch) throws IOException { // but actually only alphanums are problematic char c = (char) _decodeCharForError(ch); if (Character.isJavaIdentifierPart(c)) { _reportInvalidToken(c, matchStr.substring(0, i)); } }
c = _decodeEscaped(); break; case 2: // 2-byte UTF c = _decodeUtf8_2(c); break; case 3: // 3-byte UTF c = _decodeUtf8_3(c); break; case 4: // 4-byte UTF c = _decodeUtf8_4(c); default: if (c < INT_SPACE) { _throwUnquotedSpace(c, "string value"); } else { _reportInvalidChar(c);
_throwUnquotedSpace(ch, "name"); } else { ch = _decodeEscaped(); _quadBuffer = quads = _growArrayBy(quads, quads.length); _quadBuffer = quads = _growArrayBy(quads, quads.length); } else { if (qlen >= quads.length) { _quadBuffer = quads = _growArrayBy(quads, quads.length); _quadBuffer = quads = _growArrayBy(quads, quads.length); quads[qlen++] = pad(currQuad, currQuadBytes); name = addName(quads, qlen, currQuadBytes);
_decodeEscaped(); break; case 2: // 2-byte UTF _skipUtf8_2(); break; case 3: // 3-byte UTF _skipUtf8_3(); break; case 4: // 4-byte UTF _skipUtf8_4(); break; default: if (c < INT_SPACE) { _throwUnquotedSpace(c, "string value"); } else { _reportInvalidChar(c);
if (ch == '\'' && isEnabled(Feature.ALLOW_SINGLE_QUOTES)) { return _parseAposName(); if (!isEnabled(Feature.ALLOW_UNQUOTED_FIELD_NAMES)) { char c = (char) _decodeCharForError(ch); _reportUnexpectedChar(c, "was expecting double-quote to start field name"); _reportUnexpectedChar(ch, "was expecting either valid name character (for unquoted name) or double-quote (for quoted) to start field name"); } else { if (qlen >= quads.length) { _quadBuffer = quads = _growArrayBy(quads, quads.length); if (currQuadBytes > 0) { if (qlen >= quads.length) { _quadBuffer = quads = _growArrayBy(quads, quads.length); name = addName(quads, qlen, currQuadBytes);
protected final byte[] _decodeBase64(Base64Variant b64variant) throws IOException ByteArrayBuilder builder = _getByteArrayBuilder(); bits = _decodeBase64Escape(b64variant, ch, 0); if (bits < 0) { // white space to skip continue; bits = b64variant.decodeBase64Char(ch); if (bits < 0) { bits = _decodeBase64Escape(b64variant, ch, 1); builder.append(decodedData); if (b64variant.usesPadding()) { _handleBase64MissingPadding(b64variant); bits = _decodeBase64Escape(b64variant, ch, 2); if (!b64variant.usesPaddingChar(ch)) { if ((ch != INT_BACKSLASH) || _decodeBase64Escape(b64variant, ch, 3) != Base64Variant.BASE64_VALUE_PADDING) { throw reportInvalidBase64Char(b64variant, ch, 3, "expected padding character '"+b64variant.getPaddingChar()+"'"); builder.appendTwoBytes(decodedData); if (b64variant.usesPadding()) { _handleBase64MissingPadding(b64variant); bits = _decodeBase64Escape(b64variant, ch, 3);
return _handleUnrecognizedCharacterEscape((char) _decodeCharForError(c)); int digit = CharTypes.charToHex(ch); if (digit < 0) { _reportUnexpectedChar(ch, "expected a hex-digit for character escape sequence");
break; bits = _decodeBase64Escape(b64variant, ch, 0); if (bits < 0) { // white space to skip continue; bits = b64variant.decodeBase64Char(ch); if (bits < 0) { bits = _decodeBase64Escape(b64variant, ch, 1); buffer[outputPtr++] = (byte) decodedData; if (b64variant.usesPadding()) { _handleBase64MissingPadding(b64variant); bits = _decodeBase64Escape(b64variant, ch, 2); if (!b64variant.usesPaddingChar(ch)) { if ((ch != INT_BACKSLASH) || _decodeBase64Escape(b64variant, ch, 3) != Base64Variant.BASE64_VALUE_PADDING) { throw reportInvalidBase64Char(b64variant, ch, 3, "expected padding character '"+b64variant.getPaddingChar()+"'"); buffer[outputPtr++] = (byte) decodedData; if (b64variant.usesPadding()) { _handleBase64MissingPadding(b64variant); bits = _decodeBase64Escape(b64variant, ch, 3);
protected void _reportInvalidToken(int ch, String matchedPart, String msg) throws IOException { StringBuilder sb = new StringBuilder(matchedPart); /* Let's just try to find what appears to be the token, using * regular Java identifier character rules. It's just a heuristic, * nothing fancy here (nor fast). */ while (true) { char c = (char) _decodeCharForError(ch); if (!Character.isJavaIdentifierPart(c)) { break; } sb.append(c); ch = _inputData.readUnsignedByte(); } _reportError("Unrecognized token '"+sb.toString()+"': was expecting "+msg); }
protected final byte[] _decodeBase64(Base64Variant b64variant) throws IOException ByteArrayBuilder builder = _getByteArrayBuilder(); return builder.toByteArray(); bits = _decodeBase64Escape(b64variant, ch, 0); if (bits < 0) { // white space to skip continue; bits = b64variant.decodeBase64Char(ch); if (bits < 0) { bits = _decodeBase64Escape(b64variant, ch, 1); return builder.toByteArray(); bits = _decodeBase64Escape(b64variant, ch, 2); throw reportInvalidBase64Char(b64variant, ch, 3, "expected padding character '"+b64variant.getPaddingChar()+"'"); return builder.toByteArray(); bits = _decodeBase64Escape(b64variant, ch, 3);
break; bits = _decodeBase64Escape(b64variant, ch, 0); if (bits < 0) { // white space to skip continue; bits = b64variant.decodeBase64Char(ch); if (bits < 0) { bits = _decodeBase64Escape(b64variant, ch, 1); break; bits = _decodeBase64Escape(b64variant, ch, 2); throw reportInvalidBase64Char(b64variant, ch, 3, "expected padding character '"+b64variant.getPaddingChar()+"'"); break; bits = _decodeBase64Escape(b64variant, ch, 3);
c = _decodeEscaped(); break; case 2: // 2-byte UTF c = _decodeUtf8_2(c); break; case 3: // 3-byte UTF c = _decodeUtf8_3(c); break; case 4: // 4-byte UTF c = _decodeUtf8_4(c); default: if (c < INT_SPACE) { _throwUnquotedSpace(c, "string value"); _reportInvalidChar(c);
_throwUnquotedSpace(ch, "name"); } else { ch = _decodeEscaped(); _quadBuffer = quads = _growArrayBy(quads, quads.length); _quadBuffer = quads = _growArrayBy(quads, quads.length); } else { if (qlen >= quads.length) { _quadBuffer = quads = _growArrayBy(quads, quads.length); _quadBuffer = quads = _growArrayBy(quads, quads.length); quads[qlen++] = pad(currQuad, currQuadBytes); name = addName(quads, qlen, currQuadBytes);
_decodeEscaped(); break; case 2: // 2-byte UTF _skipUtf8_2(); break; case 3: // 3-byte UTF _skipUtf8_3(); break; case 4: // 4-byte UTF _skipUtf8_4(); break; default: if (c < INT_SPACE) { _throwUnquotedSpace(c, "string value"); } else { _reportInvalidChar(c);
if (ch == '\'' && isEnabled(JsonReadFeature.ALLOW_SINGLE_QUOTES)) { return _parseAposName(); if (!isEnabled(JsonReadFeature.ALLOW_UNQUOTED_FIELD_NAMES)) { char c = (char) _decodeCharForError(ch); _reportUnexpectedChar(c, "was expecting double-quote to start field name"); _reportUnexpectedChar(ch, "was expecting either valid name character (for unquoted name) or double-quote (for quoted) to start field name"); } else { if (qlen >= quads.length) { _quadBuffer = quads = _growArrayBy(quads, quads.length); if (currQuadBytes > 0) { if (qlen >= quads.length) { _quadBuffer = quads = _growArrayBy(quads, quads.length); name = addName(quads, qlen, currQuadBytes);
protected final byte[] _decodeBase64(Base64Variant b64variant) throws IOException ByteArrayBuilder builder = _getByteArrayBuilder(); bits = _decodeBase64Escape(b64variant, ch, 0); if (bits < 0) { // white space to skip continue; bits = b64variant.decodeBase64Char(ch); if (bits < 0) { bits = _decodeBase64Escape(b64variant, ch, 1); builder.append(decodedData); if (b64variant.usesPadding()) { _handleBase64MissingPadding(b64variant); bits = _decodeBase64Escape(b64variant, ch, 2); if (!b64variant.usesPaddingChar(ch)) { if ((ch != INT_BACKSLASH) || _decodeBase64Escape(b64variant, ch, 3) != Base64Variant.BASE64_VALUE_PADDING) { throw reportInvalidBase64Char(b64variant, ch, 3, "expected padding character '"+b64variant.getPaddingChar()+"'"); builder.appendTwoBytes(decodedData); if (b64variant.usesPadding()) { _handleBase64MissingPadding(b64variant); bits = _decodeBase64Escape(b64variant, ch, 3);
return _handleUnrecognizedCharacterEscape((char) _decodeCharForError(c)); int digit = CharTypes.charToHex(ch); if (digit < 0) { _reportUnexpectedChar(ch, "expected a hex-digit for character escape sequence");