private static int compareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart, boolean useLowerCase, boolean useRawByte) { int thisLength = getUTFLength(thisBytes, thisStart); int thatLength = getUTFLength(thatBytes, thatStart); int thisActualStart = thisStart + getNumBytesToStoreLength(thisLength); int thatActualStart = thatStart + getNumBytesToStoreLength(thatLength); return compareTo(thisBytes, thisActualStart, thisLength, thatBytes, thatActualStart, thatLength, useLowerCase, useRawByte); }
/** * reset those meta length. * Since the {@code utf8Length} and the {@code metaLength} are often used, we compute those two values in advance. * As for the {@code stringLength} and the {@code hashValue}, they will be lazily initialized after the first call. */ @Override protected void afterReset() { utf8Length = UTF8StringUtil.getUTFLength(bytes, start); metaLength = UTF8StringUtil.getNumBytesToStoreLength(getUTF8Length()); hashValue = 0; stringLength = -1; }
public int getOpenFieldNameSize(ARecordType recordType, int fieldId) { int utfleng = UTF8StringUtil.getUTFLength(bytes, getOpenFieldNameOffset(recordType, fieldId)); return utfleng + UTF8StringUtil.getNumBytesToStoreLength(utfleng); }
public static int getStringLength(byte[] b, int s) { int len = getUTFLength(b, s); int pos = s + getNumBytesToStoreLength(len); int end = pos + len; int charCount = 0; while (pos < end) { charCount++; pos += charSize(b, pos); } return charCount; }
private static int hash(byte[] bytes, int start, boolean useLowerCase, boolean useRawByte, int coefficient, int r) { int utflen = getUTFLength(bytes, start); int sStart = start + getNumBytesToStoreLength(utflen); return hash(bytes, sStart, utflen, useLowerCase, useRawByte, coefficient, r); }
public static void writeUTF8StringAsCSV(byte[] b, int s, int l, OutputStream os) throws IOException { int stringLength = UTF8StringUtil.getUTFLength(b, s); int position = s + UTF8StringUtil.getNumBytesToStoreLength(stringLength); int maxPosition = position + stringLength; os.write('"'); while (position < maxPosition) { char c = UTF8StringUtil.charAt(b, position); int sz = UTF8StringUtil.charSize(b, position); if (c == '"') { os.write('"'); } os.write(c); position += sz; } os.write('"'); }
public static StringBuilder toString(StringBuilder builder, byte[] bytes, int start) { int utfLen = getUTFLength(bytes, start); int offset = getNumBytesToStoreLength(utfLen); while (utfLen > 0) { char c = charAt(bytes, start + offset); builder.append(c); int cLen = getModifiedUTF8Len(c); offset += cLen; utfLen -= cLen; } return builder; }
private static void printUTF8String(byte[] b, int s, int l, OutputStream os, boolean useQuotes) throws IOException { int stringLength = getUTFLength(b, s); int position = s + getNumBytesToStoreLength(stringLength); int maxPosition = position + stringLength; if (useQuotes) { os.write('\"'); } while (position < maxPosition) { char c = charAt(b, position); switch (c) { // escape case '\\': case '"': os.write('\\'); break; } int sz = charSize(b, position); while (sz > 0) { os.write(b[position]); position++; sz--; } } if (useQuotes) { os.write('\"'); } }
@Override public void reset(byte[] data, int startOff) throws HyracksDataException { this.data = data; this.startOffset = startOff; this.length = UTF8StringUtil.getStringLength(data, startOffset); this.utfByteLength = UTF8StringUtil.getUTFLength(data, startOffset); this.metaLength = UTF8StringUtil.getNumBytesToStoreLength(utfByteLength); reset(); }
@Override public void reset(byte[] sentenceData, int start, int length) { this.sentenceBytes = sentenceData; this.sentenceStartOffset = start; this.sentenceEndOffset = length + start; byteIndex = this.sentenceStartOffset; if (sourceHasTypeTag) { byteIndex++; // skip type tag } sentenceUtf8Length = UTF8StringUtil.getUTFLength(sentenceData, byteIndex); byteIndex += UTF8StringUtil.getNumBytesToStoreLength(sentenceUtf8Length); // skip utf8 length indicator if (!ignoreTokenCount) { tokensStart.reset(); tokensLength.reset(); } } }
/** * Compute the normalized key of the UTF8 string. * The normalized key in Hyracks is mainly used to speedup the comparison between pointable data. * In the UTF8StringPTR case, we compute the integer value by using the first 2 chars. * The comparator will first use this integer to get the result ( <,>, or =), it will check * the actual bytes only if the normalized key is equal. Thus this normalized key must be * consistent with the comparison result. */ public static int normalize(byte[] bytes, int start) { int len = getUTFLength(bytes, start); long nk = 0; int offset = start + getNumBytesToStoreLength(len); for (int i = 0; i < 2; ++i) { nk <<= 16; if (i < len) { nk += (charAt(bytes, offset)) & 0xffff; offset += charSize(bytes, offset); } } return (int) (nk >> 1); // make it always positive. }
public static void writeUTF8StringAsJSON(byte[] b, int s, int l, OutputStream os) throws IOException { int utfLength = UTF8StringUtil.getUTFLength(b, s); int position = s + UTF8StringUtil.getNumBytesToStoreLength(utfLength); // skip 2 bytes containing string size int maxPosition = position + utfLength;
int utfLength = UTF8StringUtil.getUTFLength(sentenceBytes, tmpIndex);
@Test public void testCharAtCharSizeGetLen() throws Exception { char[] utf8Mix = STRING_UTF8_MIX.toCharArray(); byte[] buffer = writeStringToBytes(STRING_UTF8_MIX); int pos = getNumBytesToStoreLength(getUTFLength(buffer, 0)); for (char c : utf8Mix) { assertEquals(c, charAt(buffer, pos)); assertEquals(getModifiedUTF8Len(c), charSize(buffer, pos)); pos += charSize(buffer, pos); } }
int fieldUtflength = UTF8StringUtil.getUTFLength(fieldName, nstart + 1); int fieldUtfMetaLen = UTF8StringUtil.getNumBytesToStoreLength(fieldUtflength); int fieldNameHashCode = nameHashFunction.hash(fieldName, nstart + 1, fieldUtflength + fieldUtfMetaLen);
int len = UTF8StringUtil.getUTFLength(serNonTaggedAObject, offset + 1); return len + UTF8StringUtil.getNumBytesToStoreLength(len); } else { int len = UTF8StringUtil.getUTFLength(serNonTaggedAObject, offset); return len + UTF8StringUtil.getNumBytesToStoreLength(len);
.getNumBytesToStoreLength(UTF8StringUtil.getUTFLength(arg1.getByteArray(), arg1.getStartOffset())); int startOffset = arg1.getStartOffset() + numBytesToStoreLength; int length = arg1.getLength() - numBytesToStoreLength;
.getNumBytesToStoreLength(UTF8StringUtil.getUTFLength(queryArray, queryArrayStartOffset)); queryArrayStartOffset = queryArrayStartOffset + numBytesToStoreLength; queryArrayLength = queryArrayLength - numBytesToStoreLength; UTF8StringUtil.getUTFLength(tokenizerForRightArray.getToken().getData(), tokenizerForRightArray.getToken().getStartOffset())); tokenOffset = tokenOffset + numBytesToStoreLength;