@Override public int hash(byte[] bytes, int offset, int length) { return UTF8StringUtil.hash(bytes, offset, coefficient, r); } };
@Override public int hash() { if (hashValue == 0) { hashValue = UTF8StringUtil.hash(this.bytes, this.start); } return hashValue; }
/** * This function provides the raw bytes-based hash function for UTF8 strings. * Note that the hash values may not deliver the correct ordering for certain languages that include 2 or 3 bytes characters. * But it works for single-byte character languages. */ public static int rawBytehash(byte[] bytes, int start) { return hash(bytes, start, false, true, 31, Integer.MAX_VALUE); }
public static int hash(byte[] bytes, int start, int coefficient, int r) { return hash(bytes, start, false, false, coefficient, r); }
public static int hash(byte[] bytes, int start) { return hash(bytes, start, false, false, 31, Integer.MAX_VALUE); }
public static int lowerCaseHash(byte[] bytes, int start) { return hash(bytes, start, true, false, 31, Integer.MAX_VALUE); }
public static int lowerCaseHash(byte[] bytes, int start, int length) { return hash(bytes, start, length, true, false, 31, Integer.MAX_VALUE); }
private static int hash(byte[] bytes, int start, boolean useLowerCase, boolean useRawByte, int coefficient, int r) { int utflen = getUTFLength(bytes, start); int sStart = start + getNumBytesToStoreLength(utflen); return hash(bytes, sStart, utflen, useLowerCase, useRawByte, coefficient, r); }
@Test public void testHash() throws IOException { byte[] buffer = writeStringToBytes(STRING_UTF8_MIX_LOWERCASE); int lowerHash = hash(buffer, 0); buffer = writeStringToBytes(STRING_UTF8_MIX_LOWERCASE); int upperHash = lowerCaseHash(buffer, 0); assertEquals(lowerHash, upperHash); int familyOne = hash(buffer, 0, 7, 297); int familyTwo = hash(buffer, 0, 8, 297); assertTrue(familyOne != familyTwo); }