@Override public int hash(byte[] bytes, int offset, int length) throws HyracksDataException { return genericBinaryHash.hash(bytes, offset, length); } };
@Override public int hash(byte[] bytes, int offset, int length) throws HyracksDataException { ATypeTag tag = itemTypeTag; int skip = 0; if (itemTypeTag == ATypeTag.ANY) { tag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(bytes[offset]); skip = 1; } switch (tag) { case STRING: { if (ignoreCase) { return lowerCaseStringHash.hash(bytes, offset + skip, length - skip); } } default: { if (itemTypeTag != ATypeTag.ANY) { // add the itemTypeTag in front of the data try { resetTaggedBytes(bytes, offset, length); return genericBinaryHash.hash(taggedBytes.getByteArray(), 0, length + 1); } catch (IOException e) { throw HyracksDataException.create(e); } } else { return genericBinaryHash.hash(bytes, offset, length); } } } }
int probeFieldHash = fieldNameHashFunction.hash(bytes, start, length); int i = Arrays.binarySearch(hashCodeIndexPairs, ((long) probeFieldHash) << 32); i = (i < 0) ? -1 * (i + 1) : i;
@Override public int partition(IFrameTupleAccessor accessor, int tIndex, int nParts) throws HyracksDataException { int h = 0; int startOffset = accessor.getTupleStartOffset(tIndex); int slotLength = accessor.getFieldSlotsLength(); for (int j = 0; j < hashFields.length; ++j) { int fIdx = hashFields[j]; IBinaryHashFunction hashFn = hashFunctions[j]; int fStart = accessor.getFieldStartOffset(tIndex, fIdx); int fEnd = accessor.getFieldEndOffset(tIndex, fIdx); int fh = hashFn.hash(accessor.getBuffer().array(), startOffset + slotLength + fStart, fEnd - fStart); h += fh; } if (h < 0) { h = -(h + 1); } return h % nParts; } };
@Override public int partition(IFrameTupleAccessor accessor, int tIndex, int nParts) throws HyracksDataException { if (nParts == 1) { return 0; } int h = 0; int startOffset = accessor.getTupleStartOffset(tIndex); int slotLength = accessor.getFieldSlotsLength(); for (int j = 0; j < hashFields.length; ++j) { int fIdx = hashFields[j]; IBinaryHashFunction hashFn = hashFunctions[j]; int fStart = accessor.getFieldStartOffset(tIndex, fIdx); int fEnd = accessor.getFieldEndOffset(tIndex, fIdx); int fh = hashFn.hash(accessor.getBuffer().array(), startOffset + slotLength + fStart, fEnd - fStart); h = h * 31 + fh; } if (h < 0) { h = -(h + 1); } return h % nParts; } };
private BinaryEntry getPutInternal(BinaryEntry key, BinaryEntry value, boolean put) throws HyracksDataException { int bucket; if (put) { bucket = Math.abs(putHashFunc.hash(key.getBuf(), key.getOffset(), key.getLength()) % listHeads.length); } else { bucket = Math.abs(getHashFunc.hash(key.getBuf(), key.getOffset(), key.getLength()) % listHeads.length);
private int putFindInternal(BinaryEntry key, boolean isInsert, byte[] keyArray, boolean increaseFoundCount) throws HyracksDataException { int bucket; bucket = isInsert ? Math.abs(hashFunc.hash(this.refArray, key.getOffset(), key.getLength()) % listHeads.length) : Math.abs(hashFunc.hash(keyArray, key.getOffset(), key.getLength()) % listHeads.length);
@Override protected boolean processItem(IPointable item, int listIndex, IAsterixListBuilder listBuilder) throws HyracksDataException { int hash = binaryHashFunction.hash(item.getByteArray(), item.getStartOffset(), item.getLength()); List<IPointable> sameHashes = hashes.get(hash); if (sameHashes == null) { // new item sameHashes = pointableListAllocator.allocate(null); sameHashes.clear(); addItem(listBuilder, item, sameHashes); hashes.put(hash, sameHashes); return true; } else if (ArrayFunctionsUtil.findItem(item, sameHashes, comp) == null) { // new item, it could happen that two hashes are the same but they are for different items addItem(listBuilder, item, sameHashes); return true; } // else ignore since the item already exists return false; }
length = baaos.size() - serializedFieldNameOffsets[i]; hashCodeIndexPairs[i] = fieldNameHashFunction.hash(baaos.getByteArray(), serializedFieldNameOffsets[i], length); hashCodeIndexPairs[i] = hashCodeIndexPairs[i] << 32; hashCodeIndexPairs[i] = hashCodeIndexPairs[i] | i;
@Override protected boolean processItem(IPointable item, int listIndex, IAsterixListBuilder listBuilder) throws HyracksDataException { // lookup the item int hash = binaryHashFunction.hash(item.getByteArray(), item.getStartOffset(), item.getLength()); List<ValueCounter> sameHashes = hashes.get(hash); if (sameHashes == null) { // new item sameHashes = arrayListAllocator.allocate(null); sameHashes.clear(); addItem(item, listIndex, sameHashes); hashes.put(hash, sameHashes); return true; } else { // potentially, item already exists ValueCounter itemListIdxCounter = ArrayFunctionsUtil.findItem(item, sameHashes, comp); if (itemListIdxCounter == null) { // new item addItem(item, listIndex, sameHashes); return true; } // the item already exists, increment the counter (don't increment the counter for the same listIndex) if (itemListIdxCounter.listIndex != listIndex) { itemListIdxCounter.listIndex = listIndex; itemListIdxCounter.counter++; } // false, since we didn't add (use) the item return false; } }
@Override public void addField(IValueReference name, IValueReference value) throws HyracksDataException { byte[] data = value.getByteArray(); int offset = value.getStartOffset(); // MISSING for an open field means the field does not exist. if (data[offset] == ATypeTag.SERIALIZED_MISSING_TYPE_TAG) { return; } if (numberOfOpenFields == openPartOffsets.length) { openPartOffsets = Arrays.copyOf(openPartOffsets, openPartOffsets.length + DEFAULT_NUM_OPEN_FIELDS); openFieldNameLengths = Arrays.copyOf(openFieldNameLengths, openFieldNameLengths.length + DEFAULT_NUM_OPEN_FIELDS); } int fieldNameHashCode = utf8HashFunction.hash(name.getByteArray(), name.getStartOffset() + 1, name.getLength() - 1); if (recType != null) { int cFieldPos; cFieldPos = recTypeInfo.getFieldIndex(name.getByteArray(), name.getStartOffset() + 1, name.getLength() - 1); if (cFieldPos >= 0) { throw new HyracksDataException("Open field \"" + recType.getFieldNames()[cFieldPos] + "\" has the same field name as closed field at index " + cFieldPos); } } openPartOffsets[this.numberOfOpenFields] = fieldNameHashCode; openPartOffsets[this.numberOfOpenFields] = openPartOffsets[numberOfOpenFields] << 32; openPartOffsets[numberOfOpenFields] += openPartOutputStream.size(); openFieldNameLengths[numberOfOpenFields++] = name.getLength() - 1; openPartOutputStream.write(name.getByteArray(), name.getStartOffset() + 1, name.getLength() - 1); openPartOutputStream.write(value.getByteArray(), value.getStartOffset(), value.getLength()); }
int fieldUtflength = UTF8StringUtil.getUTFLength(fieldName, nstart + 1); int fieldUtfMetaLen = UTF8StringUtil.getNumBytesToStoreLength(fieldUtflength); int fieldNameHashCode = nameHashFunction.hash(fieldName, nstart + 1, fieldUtflength + fieldUtfMetaLen);
hash = binaryHashFunction.hash(item.getByteArray(), item.getStartOffset(), item.getLength()); hashes.get(hash); sameHashes = hashes.get(hash);
private void processList(ListAccessor listAccessor, int listIndex, IAsterixListBuilder listBuilder, boolean initIntersectList) throws IOException { int hash; List<ValueListIndex> sameHashes; boolean itemInStorage; IPointable item = pointableAllocator.allocateEmpty(); ArrayBackedValueStorage storage = (ArrayBackedValueStorage) storageAllocator.allocate(null); storage.reset(); for (int j = 0; j < listAccessor.size(); j++) { itemInStorage = listAccessor.getOrWriteItem(j, item, storage); if (ATYPETAGDESERIALIZER.deserialize(item.getByteArray()[item.getStartOffset()]).isDerivedType()) { throw new RuntimeDataException(ErrorCode.CANNOT_COMPARE_COMPLEX, sourceLoc); } if (notNullAndMissing(item)) { // look up to see if item exists hash = binaryHashFunction.hash(item.getByteArray(), item.getStartOffset(), item.getLength()); sameHashes = hashes.get(hash); if (initIntersectList && initIntersectList(item, hash, sameHashes)) { // item is used item = pointableAllocator.allocateEmpty(); if (itemInStorage) { storage = (ArrayBackedValueStorage) storageAllocator.allocate(null); storage.reset(); } } else { incrementCommonValue(item, sameHashes, listIndex, listBuilder); } } } }