org.apache.hyracks.util.string.UTF8StringUtil.getUTFLength java code examples

private static int compareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart, boolean useLowerCase,
    boolean useRawByte) {
  int thisLength = getUTFLength(thisBytes, thisStart);
  int thatLength = getUTFLength(thatBytes, thatStart);
  int thisActualStart = thisStart + getNumBytesToStoreLength(thisLength);
  int thatActualStart = thatStart + getNumBytesToStoreLength(thatLength);
  return compareTo(thisBytes, thisActualStart, thisLength, thatBytes, thatActualStart, thatLength, useLowerCase,
      useRawByte);
}

/**
 * reset those meta length.
 * Since the {@code utf8Length} and the {@code metaLength} are often used, we compute those two values in advance.
 * As for the {@code stringLength} and the {@code hashValue}, they will be lazily initialized after the first call.
 */
@Override
protected void afterReset() {
  utf8Length = UTF8StringUtil.getUTFLength(bytes, start);
  metaLength = UTF8StringUtil.getNumBytesToStoreLength(getUTF8Length());
  hashValue = 0;
  stringLength = -1;
}

public int getOpenFieldNameSize(ARecordType recordType, int fieldId) {
  int utfleng = UTF8StringUtil.getUTFLength(bytes, getOpenFieldNameOffset(recordType, fieldId));
  return utfleng + UTF8StringUtil.getNumBytesToStoreLength(utfleng);
}

public static int getStringLength(byte[] b, int s) {
  int len = getUTFLength(b, s);
  int pos = s + getNumBytesToStoreLength(len);
  int end = pos + len;
  int charCount = 0;
  while (pos < end) {
    charCount++;
    pos += charSize(b, pos);
  }
  return charCount;
}

private static int hash(byte[] bytes, int start, boolean useLowerCase, boolean useRawByte, int coefficient, int r) {
  int utflen = getUTFLength(bytes, start);
  int sStart = start + getNumBytesToStoreLength(utflen);
  return hash(bytes, sStart, utflen, useLowerCase, useRawByte, coefficient, r);
}

public static void writeUTF8StringAsCSV(byte[] b, int s, int l, OutputStream os) throws IOException {
  int stringLength = UTF8StringUtil.getUTFLength(b, s);
  int position = s + UTF8StringUtil.getNumBytesToStoreLength(stringLength);
  int maxPosition = position + stringLength;
  os.write('"');
  while (position < maxPosition) {
    char c = UTF8StringUtil.charAt(b, position);
    int sz = UTF8StringUtil.charSize(b, position);
    if (c == '"') {
      os.write('"');
    }
    os.write(c);
    position += sz;
  }
  os.write('"');
}

public static StringBuilder toString(StringBuilder builder, byte[] bytes, int start) {
  int utfLen = getUTFLength(bytes, start);
  int offset = getNumBytesToStoreLength(utfLen);
  while (utfLen > 0) {
    char c = charAt(bytes, start + offset);
    builder.append(c);
    int cLen = getModifiedUTF8Len(c);
    offset += cLen;
    utfLen -= cLen;
  }
  return builder;
}

private static void printUTF8String(byte[] b, int s, int l, OutputStream os, boolean useQuotes) throws IOException {
  int stringLength = getUTFLength(b, s);
  int position = s + getNumBytesToStoreLength(stringLength);
  int maxPosition = position + stringLength;
  if (useQuotes) {
    os.write('\"');
  }
  while (position < maxPosition) {
    char c = charAt(b, position);
    switch (c) {
      // escape
      case '\\':
      case '"':
        os.write('\\');
        break;
    }
    int sz = charSize(b, position);
    while (sz > 0) {
      os.write(b[position]);
      position++;
      sz--;
    }
  }
  if (useQuotes) {
    os.write('\"');
  }
}

@Override
public void reset(byte[] data, int startOff) throws HyracksDataException {
  this.data = data;
  this.startOffset = startOff;
  this.length = UTF8StringUtil.getStringLength(data, startOffset);
  this.utfByteLength = UTF8StringUtil.getUTFLength(data, startOffset);
  this.metaLength = UTF8StringUtil.getNumBytesToStoreLength(utfByteLength);
  reset();
}

  @Override
  public void reset(byte[] sentenceData, int start, int length) {
    this.sentenceBytes = sentenceData;
    this.sentenceStartOffset = start;
    this.sentenceEndOffset = length + start;

    byteIndex = this.sentenceStartOffset;
    if (sourceHasTypeTag) {
      byteIndex++; // skip type tag
    }
    sentenceUtf8Length = UTF8StringUtil.getUTFLength(sentenceData, byteIndex);
    byteIndex += UTF8StringUtil.getNumBytesToStoreLength(sentenceUtf8Length); // skip utf8 length indicator

    if (!ignoreTokenCount) {
      tokensStart.reset();
      tokensLength.reset();
    }
  }
}

/**
 * Compute the normalized key of the UTF8 string.
 * The normalized key in Hyracks is mainly used to speedup the comparison between pointable data.
 * In the UTF8StringPTR case, we compute the integer value by using the first 2 chars.
 * The comparator will first use this integer to get the result ( <,>, or =), it will check
 * the actual bytes only if the normalized key is equal. Thus this normalized key must be
 * consistent with the comparison result.
 */
public static int normalize(byte[] bytes, int start) {
  int len = getUTFLength(bytes, start);
  long nk = 0;
  int offset = start + getNumBytesToStoreLength(len);
  for (int i = 0; i < 2; ++i) {
    nk <<= 16;
    if (i < len) {
      nk += (charAt(bytes, offset)) & 0xffff;
      offset += charSize(bytes, offset);
    }
  }
  return (int) (nk >> 1); // make it always positive.
}

public static void writeUTF8StringAsJSON(byte[] b, int s, int l, OutputStream os) throws IOException {
  int utfLength = UTF8StringUtil.getUTFLength(b, s);
  int position = s + UTF8StringUtil.getNumBytesToStoreLength(utfLength); // skip 2 bytes containing string size
  int maxPosition = position + utfLength;

int utfLength = UTF8StringUtil.getUTFLength(sentenceBytes, tmpIndex);

@Test
public void testCharAtCharSizeGetLen() throws Exception {
  char[] utf8Mix = STRING_UTF8_MIX.toCharArray();
  byte[] buffer = writeStringToBytes(STRING_UTF8_MIX);
  int pos = getNumBytesToStoreLength(getUTFLength(buffer, 0));
  for (char c : utf8Mix) {
    assertEquals(c, charAt(buffer, pos));
    assertEquals(getModifiedUTF8Len(c), charSize(buffer, pos));
    pos += charSize(buffer, pos);
  }
}

int fieldUtflength = UTF8StringUtil.getUTFLength(fieldName, nstart + 1);
int fieldUtfMetaLen = UTF8StringUtil.getNumBytesToStoreLength(fieldUtflength);
int fieldNameHashCode = nameHashFunction.hash(fieldName, nstart + 1, fieldUtflength + fieldUtfMetaLen);

  int len = UTF8StringUtil.getUTFLength(serNonTaggedAObject, offset + 1);
  return len + UTF8StringUtil.getNumBytesToStoreLength(len);
} else {
  int len = UTF8StringUtil.getUTFLength(serNonTaggedAObject, offset);
  return len + UTF8StringUtil.getNumBytesToStoreLength(len);

    .getNumBytesToStoreLength(UTF8StringUtil.getUTFLength(arg1.getByteArray(), arg1.getStartOffset()));
int startOffset = arg1.getStartOffset() + numBytesToStoreLength;
int length = arg1.getLength() - numBytesToStoreLength;

    .getNumBytesToStoreLength(UTF8StringUtil.getUTFLength(queryArray, queryArrayStartOffset));
queryArrayStartOffset = queryArrayStartOffset + numBytesToStoreLength;
queryArrayLength = queryArrayLength - numBytesToStoreLength;
      UTF8StringUtil.getUTFLength(tokenizerForRightArray.getToken().getData(),
          tokenizerForRightArray.getToken().getStartOffset()));
  tokenOffset = tokenOffset + numBytesToStoreLength;

How to use getUTFLengthmethodin org.apache.hyracks.util.string.UTF8StringUtil

Best Java code snippets using org.apache.hyracks.util.string.UTF8StringUtil.getUTFLength (Showing top 18 results out of 315)

How to use
getUTFLength
method
in
org.apache.hyracks.util.string.UTF8StringUtil