public String createAsEncodedString(byte[] bytes, int start, int length, Encoding encoding) { // FIXME: We should be able to move some faster non-exception cache using Encoding.isDefined try { Charset charset = EncodingUtils.charsetForEncoding(getEncoding()); if (charset != null) { if (charset == RubyEncoding.UTF8) { return RubyEncoding.decodeUTF8(bytes, start, length); } else { return new String(bytes, start, length, charset); } } } catch (UnsupportedCharsetException e) {} return new String(bytes, start, length); }
public String createAsEncodedString(byte[] bytes, int start, int length, Encoding encoding) { // FIXME: We should be able to move some faster non-exception cache using Encoding.isDefined try { Charset charset = EncodingUtils.charsetForEncoding(getEncoding()); if (charset != null) { if (charset == RubyEncoding.UTF8) { return RubyEncoding.decodeUTF8(bytes, start, length); } else { return new String(bytes, start, length, charset); } } } catch (UnsupportedCharsetException e) {} return new String(bytes, start, length); }
/** * This differs from MRI in a few ways. This version does not apply value to a separate token buffer. * It is for use when we know we will not be omitting or including ant non-syntactical characters. Use * tokadd_mbchar(int, ByteList) if the string differs from actual source. Secondly, this returns a boolean * instead of the first byte passed. MRI only used the return value as a success/failure code to return * EOF. * * Because this version does not use a separate token buffer we only just increment lex_p. When we reach * end of the token it will just get the bytes directly from source directly. */ public boolean tokadd_mbchar(int first_byte) { int length = precise_mbclen(); if (length <= 0) { compile_error("invalid multibyte char (" + getEncoding() + ")"); return false; } else if (length > 1) { tokenCR = StringSupport.CR_VALID; } lex_p += length - 1; // we already read first byte so advance pointer for remainder return true; }
/** * This differs from MRI in a few ways. This version does not apply value to a separate token buffer. * It is for use when we know we will not be omitting or including ant non-syntactical characters. Use * tokadd_mbchar(int, ByteList) if the string differs from actual source. Secondly, this returns a boolean * instead of the first byte passed. MRI only used the return value as a success/failure code to return * EOF. * * Because this version does not use a separate token buffer we only just increment lex_p. When we reach * end of the token it will just get the bytes directly from source directly. */ public boolean tokadd_mbchar(int first_byte) { int length = precise_mbclen(); if (length <= 0) { compile_error("invalid multibyte char (" + getEncoding() + ")"); return false; } else if (length > 1) { tokenCR = StringSupport.CR_VALID; } lex_p += length - 1; // we already read first byte so advance pointer for remainder return true; }
public String createTokenString(int start) { return createAsEncodedString(lexb.getUnsafeBytes(), lexb.begin() + start, lex_p - start, getEncoding()); }
public ByteList createTokenByteList() { return new ByteList(lexb.unsafeBytes(), lexb.begin() + tokp, lex_p - tokp, getEncoding(), true); }
public ByteList createTokenByteList(int start) { return new ByteList(lexb.unsafeBytes(), lexb.begin() + start, lex_p - tokp, getEncoding(), false); }
public ByteList createTokenByteList(int start) { return new ByteList(lexb.unsafeBytes(), lexb.begin() + start, lex_p - tokp, getEncoding(), false); }
public ByteList createTokenByteList() { return new ByteList(lexb.unsafeBytes(), lexb.begin() + tokp, lex_p - tokp, getEncoding(), true); }
public String createTokenString(int start) { return createAsEncodedString(lexb.getUnsafeBytes(), lexb.begin() + start, lex_p - start, getEncoding()); }
public boolean tokadd_mbchar(int first_byte, ByteList buffer) { int length = precise_mbclen(); if (length <= 0) { compile_error("invalid multibyte char (" + getEncoding() + ")"); return false; } tokAdd(first_byte, buffer); // add first byte since we have it. lex_p += length - 1; // we already read first byte so advance pointer for remainder if (length > 1) tokCopy(length - 1, buffer); // copy next n bytes over. return true; }
public boolean tokadd_mbchar(int first_byte, ByteList buffer) { int length = precise_mbclen(); if (length <= 0) { compile_error("invalid multibyte char (" + getEncoding() + ")"); return false; } tokAdd(first_byte, buffer); // add first byte since we have it. lex_p += length - 1; // we already read first byte so advance pointer for remainder if (length > 1) tokCopy(length - 1, buffer); // copy next n bytes over. return true; }
public void setRegexpEncoding(Ruby runtime, ByteList value, RegexpOptions options) { Encoding optionsEncoding = options.setup(runtime); // Change encoding to one specified by regexp options as long as the string is compatible. if (optionsEncoding != null) { if (optionsEncoding != value.getEncoding() && !is7BitASCII(value)) { mismatchedRegexpEncodingError(optionsEncoding, value.getEncoding()); } value.setEncoding(optionsEncoding); } else if (options.isEncodingNone()) { if (value.getEncoding() != ASCII8BIT_ENCODING && !is7BitASCII(value)) { mismatchedRegexpEncodingError(optionsEncoding, value.getEncoding()); } value.setEncoding(ASCII8BIT_ENCODING); } else if (getEncoding() == USASCII_ENCODING) { if (!is7BitASCII(value)) { value.setEncoding(USASCII_ENCODING); // This will raise later } else { value.setEncoding(ASCII8BIT_ENCODING); } } }
public void setRegexpEncoding(Ruby runtime, ByteList value, RegexpOptions options) { Encoding optionsEncoding = options.setup(runtime); // Change encoding to one specified by regexp options as long as the string is compatible. if (optionsEncoding != null) { if (optionsEncoding != value.getEncoding() && !is7BitASCII(value)) { mismatchedRegexpEncodingError(optionsEncoding, value.getEncoding()); } value.setEncoding(optionsEncoding); } else if (options.isEncodingNone()) { if (value.getEncoding() != ASCII8BIT_ENCODING && !is7BitASCII(value)) { mismatchedRegexpEncodingError(optionsEncoding, value.getEncoding()); } value.setEncoding(ASCII8BIT_ENCODING); } else if (getEncoding() == USASCII_ENCODING) { if (!is7BitASCII(value)) { value.setEncoding(USASCII_ENCODING); // This will raise later } else { value.setEncoding(ASCII8BIT_ENCODING); } } }