org.apache.hyracks.util.string java code examples

  @Override
  public int hash(byte[] bytes, int offset, int length) {
    return UTF8StringUtil.hash(bytes, offset, coefficient, r);
  }
};

/**
 * This function provides the raw bytes-based comparison for UTF8 strings.
 * Note that the comparison may not deliver the correct ordering for certain languages that include 2 or 3 bytes characters.
 * But it works for single-byte character languages.
 */
public static int rawByteCompareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart) {
  return compareTo(thisBytes, thisStart, thatBytes, thatStart, false, true);
}

/**
 * Write a UTF8 String <code>str</code> into the DataOutput <code>out</code>
 *
 * @param str,
 *            a Unicode string;
 * @param out,
 *            a Data output stream.
 * @throws IOException
 */
public static void writeUTF8(CharSequence str, DataOutput out) throws IOException {
  writeUTF8(str, out, null);
}

public static int getStringLength(byte[] b, int s) {
  int len = getUTFLength(b, s);
  int pos = s + getNumBytesToStoreLength(len);
  int end = pos + len;
  int charCount = 0;
  while (pos < end) {
    charCount++;
    pos += charSize(b, pos);
  }
  return charCount;
}

private static int hash(byte[] bytes, int start, boolean useLowerCase, boolean useRawByte, int coefficient, int r) {
  int utflen = getUTFLength(bytes, start);
  int sStart = start + getNumBytesToStoreLength(utflen);
  return hash(bytes, sStart, utflen, useLowerCase, useRawByte, coefficient, r);
}

private static int compareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart, boolean useLowerCase,
    boolean useRawByte) {
  int thisLength = getUTFLength(thisBytes, thisStart);
  int thatLength = getUTFLength(thatBytes, thatStart);
  int thisActualStart = thisStart + getNumBytesToStoreLength(thisLength);
  int thatActualStart = thatStart + getNumBytesToStoreLength(thatLength);
  return compareTo(thisBytes, thisActualStart, thisLength, thatBytes, thatActualStart, thatLength, useLowerCase,
      useRawByte);
}

/**
 * Returns the character at the given byte offset. The caller is responsible for making sure that
 * the provided offset is within bounds and points to the beginning of a valid UTF8 character.
 *
 * @param offset
 *            - Byte offset
 * @return Character at the given offset.
 */
public char charAt(int offset) {
  return UTF8StringUtil.charAt(bytes, start + offset);
}

@Override
public void next() throws HyracksDataException {
  pos += UTF8StringUtil.charSize(data, pos);
}

/**
 * Gets the length of the string in characters.
 * The first time call will need to go through the entire string, the following call will just return the pre-caculated result
 *
 * @return length of string in characters
 */
public int getStringLength() {
  if (stringLength < 0) {
    stringLength = UTF8StringUtil.getStringLength(bytes, start);
  }
  return stringLength;
}

@Override
public int hash() {
  return UTF8StringUtil.lowerCaseHash(bytes, start, length);
}

@Override
public int compareTo(byte[] bytes, int start, int length) {
  return UTF8StringUtil.rawByteCompareTo(this.bytes, this.start, bytes, start);
}

@Override
public void normalize(byte[] bytes, int start, int length, int[] normalizedKeys, int keyStart) {
  normalizedKeys[keyStart] = UTF8StringUtil.normalize(bytes, start);
}

public static void printUTF8StringWithQuotes(byte[] b, int s, int l, OutputStream os) throws IOException {
  printUTF8String(b, s, l, os, true);
}

@Override
public int hash() {
  return UTF8StringUtil.rawBytehash(this.bytes, this.start);
}

@Override
public int hash() {
  if (hashValue == 0) {
    hashValue = UTF8StringUtil.hash(this.bytes, this.start);
  }
  return hashValue;
}

@Override
public int compareTo(byte[] bytes, int start, int length) {
  return UTF8StringUtil.compareTo(this.bytes, this.start, bytes, start);
}

public final void writeUTF8(CharSequence str, DataOutput out) throws IOException {
  UTF8StringUtil.writeUTF8(str, out, this);
}

@Override
public int hash() {
  return UTF8StringUtil.lowerCaseHash(bytes, start);
}

public static void printUTF8StringWithQuotes(String str, OutputStream os) throws IOException {
  printUTF8String(str, os, true);
}

/**
 * This function provides the raw bytes-based hash function for UTF8 strings.
 * Note that the hash values may not deliver the correct ordering for certain languages that include 2 or 3 bytes characters.
 * But it works for single-byte character languages.
 */
public static int rawBytehash(byte[] bytes, int start) {
  return hash(bytes, start, false, true, 31, Integer.MAX_VALUE);
}

How to use org.apache.hyracks.util.string

Best Java code snippets using org.apache.hyracks.util.string (Showing top 20 results out of 315)