/** * Creates a {@link UTF8StreamJsonParser} from the inputstream with the supplied buf {@code inBuffer} to use. */ static UTF8StreamJsonParser newJsonParser(InputStream in, byte[] buf, int offset, int limit, boolean bufferRecyclable, IOContext context) throws IOException { return new UTF8StreamJsonParser(context, DEFAULT_JSON_FACTORY.getParserFeatures(), in, DEFAULT_JSON_FACTORY.getCodec(), DEFAULT_JSON_FACTORY.getRootByteSymbols().makeChild(1), buf, offset, limit, bufferRecyclable); }
protected final boolean _loadMore() throws IOException { final int bufSize = _inputEnd; _currInputProcessed += _inputEnd; _currInputRowStart -= _inputEnd; // 26-Nov-2015, tatu: Since name-offset requires it too, must offset // this increase to avoid "moving" name-offset, resulting most likely // in negative value, which is fine as combine value remains unchanged. _nameStartOffset -= bufSize; if (_inputStream != null) { int space = _inputBuffer.length; if (space == 0) { // only occurs when we've been closed return false; } int count = _inputStream.read(_inputBuffer, 0, space); if (count > 0) { _inputPtr = 0; _inputEnd = count; return true; } // End of input _closeInput(); // Should never return 0, so let's fail if (count == 0) { throw new IOException("InputStream.read() returned 0 characters when trying to read "+_inputBuffer.length+" bytes"); } } return false; }
@Override public byte[] getBinaryValue(Base64Variant b64variant) throws IOException { if (_currToken != JsonToken.VALUE_STRING && (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT || _binaryValue == null)) { _reportError("Current token ("+_currToken+") not VALUE_STRING or VALUE_EMBEDDED_OBJECT, can not access as binary"); } // To ensure that we won't see inconsistent data, better clear up state... if (_tokenIncomplete) { try { _binaryValue = _decodeBase64(b64variant); } catch (IllegalArgumentException iae) { throw _constructError("Failed to decode VALUE_STRING as base64 ("+b64variant+"): "+iae.getMessage()); } // let's clear incomplete only now; allows for accessing other textual content in error cases _tokenIncomplete = false; } else { // may actually require conversion... if (_binaryValue == null) { @SuppressWarnings("resource") ByteArrayBuilder builder = _getByteArrayBuilder(); _decodeBase64(getText(), builder, b64variant); _binaryValue = builder.toByteArray(); } } return _binaryValue; }
private final void _checkMatchEnd(String matchStr, int i, int ch) throws IOException { // but actually only alphanums are problematic char c = (char) _decodeCharForError(ch); if (Character.isJavaIdentifierPart(c)) { _reportInvalidToken(matchStr.substring(0, i)); } }
private final int _skipWS2() throws IOException { while (_inputPtr < _inputEnd || _loadMore()) { int i = _inputBuffer[_inputPtr++] & 0xFF; if (i > INT_SPACE) { if (i == INT_SLASH) { _skipComment(); continue; } if (i == INT_HASH) { if (_skipYAMLComment()) { continue; } } return i; } if (i != INT_SPACE) { if (i == INT_LF) { ++_currInputRow; _currInputRowStart = _inputPtr; } else if (i == INT_CR) { _skipCR(); } else if (i != INT_TAB) { _throwInvalidSpace(i); } } } throw _constructError("Unexpected end-of-input within/between "+_parsingContext.typeDesc()+" entries"); }
char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); while (true) { if (_inputPtr >= _inputEnd) { _loadMoreGuaranteed(); outBuf = _textBuffer.finishCurrentSegment(); outPtr = 0; c = _decodeEscaped(); break; case 2: // 2-byte UTF c = _decodeUtf8_2(c); break; case 3: // 3-byte UTF if ((_inputEnd - _inputPtr) >= 2) { c = _decodeUtf8_3fast(c); } else { c = _decodeUtf8_3(c); c = _decodeUtf8_4(c); default: if (c < INT_SPACE) { _throwUnquotedSpace(c, "string value"); _reportInvalidChar(c);
if (ch == INT_APOS && isEnabled(Feature.ALLOW_SINGLE_QUOTES)) { return _parseAposName(); if (!isEnabled(Feature.ALLOW_UNQUOTED_FIELD_NAMES)) { char c = (char) _decodeCharForError(ch); _reportUnexpectedChar(c, "was expecting double-quote to start field name"); _reportUnexpectedChar(ch, "was expecting either valid name character (for unquoted name) or double-quote (for quoted) to start field name"); } else { if (qlen >= quads.length) { _quadBuffer = quads = growArrayBy(quads, quads.length); if (!_loadMore()) { _reportInvalidEOF(" in field name", JsonToken.FIELD_NAME); _quadBuffer = quads = growArrayBy(quads, quads.length); String name = _symbols.findName(quads, qlen); if (name == null) { name = addName(quads, qlen, currQuadBytes);
_throwUnquotedSpace(ch, "name"); } else { ch = _decodeEscaped(); _quadBuffer = quads = growArrayBy(quads, quads.length); _quadBuffer = quads = growArrayBy(quads, quads.length); } else { if (qlen >= quads.length) { _quadBuffer = quads = growArrayBy(quads, quads.length); if (!_loadMore()) { _reportInvalidEOF(" in field name", JsonToken.FIELD_NAME); _quadBuffer = quads = growArrayBy(quads, quads.length); quads[qlen++] = _padLastQuad(currQuad, currQuadBytes); String name = _symbols.findName(quads, qlen); if (name == null) { name = addName(quads, qlen, currQuadBytes);
private final int _skipWS2() throws IOException { while (_inputPtr < _inputEnd || loadMore()) { int i = _inputBuffer[_inputPtr++] & 0xFF; if (i > INT_SPACE) { if (i == INT_SLASH) { _skipComment(); continue; } if (i == INT_HASH) { if (_skipYAMLComment()) { continue; } } return i; } if (i != INT_SPACE) { if (i == INT_LF) { ++_currInputRow; _currInputRowStart = _inputPtr; } else if (i == INT_CR) { _skipCR(); } else if (i != INT_TAB) { _throwInvalidSpace(i); } } } throw _constructError("Unexpected end-of-input within/between "+_parsingContext.getTypeDesc()+" entries"); }
int max = _inputEnd; if (ptr >= max) { _loadMoreGuaranteed(); ptr = _inputPtr; max = _inputEnd; _decodeEscaped(); break; case 2: // 2-byte UTF _skipUtf8_2(); break; case 3: // 3-byte UTF _skipUtf8_3(); break; case 4: // 4-byte UTF _skipUtf8_4(c); break; default: if (c < INT_SPACE) { _throwUnquotedSpace(c, "string value"); } else { _reportInvalidChar(c);
do { if (_inputPtr >= _inputEnd) { _loadMoreGuaranteed(); bits = _decodeBase64Escape(b64variant, ch, 0); if (bits < 0) { // white space to skip continue; _loadMoreGuaranteed(); bits = _decodeBase64Escape(b64variant, ch, 1); _loadMoreGuaranteed(); _handleBase64MissingPadding(b64variant); bits = _decodeBase64Escape(b64variant, ch, 2); _loadMoreGuaranteed(); if (_decodeBase64Escape(b64variant, ch, 3) != Base64Variant.BASE64_VALUE_PADDING) { throw reportInvalidBase64Char(b64variant, ch, 3, "expected padding character '"+b64variant.getPaddingChar()+"'"); _loadMoreGuaranteed(); _handleBase64MissingPadding(b64variant); bits = _decodeBase64Escape(b64variant, ch, 3);
loadMoreGuaranteed(); case INT_TAB: case INT_CR: _skipCR(); break; case INT_LF: _skipLF(); break; case INT_SLASH: _skipComment(); break; default: if (i < INT_SPACE) { _throwInvalidSpace(i); loadMoreGuaranteed(); _reportUnexpectedChar(i, "was expecting a colon to separate field name and value"); while (_inputPtr < _inputEnd || loadMore()) { i = _inputBuffer[_inputPtr++] & 0xFF; if (i > INT_SPACE) { _skipComment(); } else if (i != INT_SPACE) { if (i == INT_LF) { _skipLF(); } else if (i == INT_CR) {
int max = _inputEnd; if (ptr >= max) { loadMoreGuaranteed(); ptr = _inputPtr; max = _inputEnd; _decodeEscaped(); break; case 2: // 2-byte UTF _skipUtf8_2(c); break; case 3: // 3-byte UTF _skipUtf8_3(c); break; case 4: // 4-byte UTF _skipUtf8_4(c); break; default: if (c < INT_SPACE) { _throwUnquotedSpace(c, "string value"); } else { _reportInvalidChar(c);
private final int _skipWS() throws IOException, JsonParseException { while (_inputPtr < _inputEnd || loadMore()) { int i = _inputBuffer[_inputPtr++] & 0xFF; if (i > INT_SPACE) { if (i != INT_SLASH) { return i; } _skipComment(); } else if (i != INT_SPACE) { if (i == INT_LF) { _skipLF(); } else if (i == INT_CR) { _skipCR(); } else if (i != INT_TAB) { _throwInvalidSpace(i); } } } throw _constructError("Unexpected end-of-input within/between "+_parsingContext.getTypeDesc()+" entries"); }
protected final byte[] _decodeBase64(Base64Variant b64variant) throws IOException ByteArrayBuilder builder = _getByteArrayBuilder(); do { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); bits = _decodeBase64Escape(b64variant, ch, 0); if (bits < 0) { // white space to skip continue; loadMoreGuaranteed(); bits = _decodeBase64Escape(b64variant, ch, 1); loadMoreGuaranteed(); bits = _decodeBase64Escape(b64variant, ch, 2); loadMoreGuaranteed(); throw reportInvalidBase64Char(b64variant, ch, 3, "expected padding character '"+b64variant.getPaddingChar()+"'"); loadMoreGuaranteed(); bits = _decodeBase64Escape(b64variant, ch, 3);
if (!_loadMore()) { _reportInvalidEOF(" in character escape sequence", JsonToken.VALUE_STRING); return _handleUnrecognizedCharacterEscape((char) _decodeCharForError(c)); for (int i = 0; i < 4; ++i) { if (_inputPtr >= _inputEnd) { if (!_loadMore()) { _reportInvalidEOF(" in character escape sequence", JsonToken.VALUE_STRING); int digit = CharTypes.charToHex(ch); if (digit < 0) { _reportUnexpectedChar(ch, "expected a hex-digit for character escape sequence");
protected void _reportInvalidToken(String matchedPart, String msg) throws IOException { /* Let's just try to find what appears to be the token, using * regular Java identifier character rules. It's just a heuristic, * nothing fancy here (nor fast). */ StringBuilder sb = new StringBuilder(matchedPart); while ((_inputPtr < _inputEnd) || _loadMore()) { int i = (int) _inputBuffer[_inputPtr++]; char c = (char) _decodeCharForError(i); if (!Character.isJavaIdentifierPart(c)) { // 11-Jan-2016, tatu: note: we will fully consume the character, // included or not, so if recovery was possible, it'd be off-by-one... break; } sb.append(c); if (sb.length() >= MAX_ERROR_TOKEN_LENGTH) { sb.append("..."); break; } } _reportError("Unrecognized token '%s': was expecting %s", sb, msg); }
do { if (_inputPtr >= _inputEnd) { loadMoreGuaranteed(); break; bits = _decodeBase64Escape(b64variant, ch, 0); if (bits < 0) { // white space to skip continue; loadMoreGuaranteed(); bits = _decodeBase64Escape(b64variant, ch, 1); loadMoreGuaranteed(); break; bits = _decodeBase64Escape(b64variant, ch, 2); loadMoreGuaranteed(); throw reportInvalidBase64Char(b64variant, ch, 3, "expected padding character '"+b64variant.getPaddingChar()+"'"); loadMoreGuaranteed(); bits = _decodeBase64Escape(b64variant, ch, 3);
do { if (_inputPtr >= _inputEnd) { _loadMoreGuaranteed(); break; bits = _decodeBase64Escape(b64variant, ch, 0); if (bits < 0) { // white space to skip continue; _loadMoreGuaranteed(); bits = _decodeBase64Escape(b64variant, ch, 1); _loadMoreGuaranteed(); break; bits = _decodeBase64Escape(b64variant, ch, 2); _loadMoreGuaranteed(); throw reportInvalidBase64Char(b64variant, ch, 3, "expected padding character '"+b64variant.getPaddingChar()+"'"); _loadMoreGuaranteed(); bits = _decodeBase64Escape(b64variant, ch, 3);
protected void _reportInvalidToken(String matchedPart, String msg) throws IOException { StringBuilder sb = new StringBuilder(matchedPart); /* Let's just try to find what appears to be the token, using * regular Java identifier character rules. It's just a heuristic, * nothing fancy here (nor fast). */ while (true) { if (_inputPtr >= _inputEnd && !loadMore()) { break; } int i = (int) _inputBuffer[_inputPtr++]; char c = (char) _decodeCharForError(i); if (!Character.isJavaIdentifierPart(c)) { break; } sb.append(c); } _reportError("Unrecognized token '"+sb.toString()+"': was expecting "+msg); }