@Override public Integer deserialize(ByteBuffer in) { return BytesUtil.readUnsigned(in, 4); }
/** the reverse of dictIdToString(), returns integer ID */ public static int stringToDictId(String str) { try { byte[] bytes = str.getBytes("ISO-8859-1"); return BytesUtil.readUnsigned(bytes, 0, bytes.length); } catch (UnsupportedEncodingException e) { // never happen return 0; } }
@Override public int getPartition(SelfDefineSortableKey skey, NullWritable value, int numReduceTasks) { return BytesUtil.readUnsigned(skey.getText().getBytes(), 0, 1); } }
@Override public String decode(byte[] bytes, int offset, int len) { int id = BytesUtil.readUnsigned(bytes, offset, len); try { String value = dict.getValueFromId(id); return value; } catch (IllegalArgumentException e) { logger.error("Can't get dictionary value from " + dict + " (id = " + id + ")"); return ""; } }
@Override public int getPartition(SelfDefineSortableKey skey, Text value, int numReduceTasks) { Text key = skey.getText(); if (key.getBytes()[0] == FactDistinctColumnsReducerMapping.MARK_FOR_HLL_COUNTER) { Long cuboidId = Bytes.toLong(key.getBytes(), 1, Bytes.SIZEOF_LONG); return reducerMapping.getReducerIdForCuboidRowCount(cuboidId); } else { return BytesUtil.readUnsigned(key.getBytes(), 0, 1); } }
public byte[] getFirstValue() { int nodeOffset = headSize; ByteArrayOutputStream bytes = new ByteArrayOutputStream(); while (true) { int valueLen = BytesUtil.readUnsigned(trieBytes, nodeOffset + firstByteOffset - 1, 1); bytes.write(trieBytes, nodeOffset + firstByteOffset, valueLen); if (checkFlag(nodeOffset, BIT_IS_END_OF_VALUE)) { break; } nodeOffset = headSize + (int) (BytesUtil.readLong(trieBytes, nodeOffset, sizeChildOffset) & childOffsetMask); if (nodeOffset == headSize) { break; } } return bytes.toByteArray(); }
@Override public Object deserialize(ByteBuffer in) { int id = BytesUtil.readUnsigned(in, dict.getSizeOfId()); return dict.getValueFromId(id); }
int len = BytesUtil.readUnsigned(trieBytes, p - 1, 1); System.arraycopy(trieBytes, p, returnValue, o, len); o += len; int nValuesBeneath; while (true) { nValuesBeneath = BytesUtil.readUnsigned(trieBytes, c + sizeChildOffset, sizeNoValuesBeneath); if (seq - nValuesBeneath < 0) { // value is under this child, reset n and loop again n = c; return -1; // no more child? corrupted dictionary! p = c + firstByteOffset; c = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1);
setConverterByName(converterName); this.nValues = BytesUtil.readUnsigned(trieBytes, headSize + sizeChildOffset, sizeNoValuesBeneath); this.sizeOfId = BytesUtil.sizeForValue(baseId + nValues + 1L); // note baseId could raise 1 byte in ID space, +1 to reserve all 0xFF for NULL case this.childOffsetMask = ~((long) (BIT_IS_LAST_CHILD | BIT_IS_END_OF_VALUE) << ((sizeChildOffset - 1) * 8));
int parLen = BytesUtil.readUnsigned(trieBytes, p - 1, 1); boolean isEndOfValue = checkFlag(offset, BIT_IS_END_OF_VALUE); BytesUtil.readUnsigned(trieBytes, p + parLen, sizeOfId);
@Override public int getPartition(Object o) { if (initialized == false) { synchronized (SparkFactDistinct.class) { if (initialized == false) { init(); } } } SelfDefineSortableKey skey = (SelfDefineSortableKey) o; Text key = skey.getText(); if (key.getBytes()[0] == FactDistinctColumnsReducerMapping.MARK_FOR_HLL_COUNTER) { Long cuboidId = Bytes.toLong(key.getBytes(), 1, Bytes.SIZEOF_LONG); return reducerMapping.getReducerIdForCuboidRowCount(cuboidId); } else { return BytesUtil.readUnsigned(key.getBytes(), 0, 1); } } }
@Override public void fillTuple(Tuple tuple, int row) { if (expectRow++ != row) throw new IllegalStateException(); ByteArray raw = rawIterator.next(); int key = BytesUtil.readUnsigned(raw.array(), raw.offset(), raw.length()); String colValue = rawColDict.getValueFromId(key); tuple.setDimensionValue(literalTupleIdx, colValue); } };
int len = BytesUtil.readUnsigned(trieBytes, p - 1, 1); System.arraycopy(trieBytes, p, returnValue, o, len); o += len; c = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1);
int end = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1); // end of node's value for (; p < end && o < inpEnd; p++, o++) { // note matching start from [0] if (trieBytes[p] != inp[o]) { return p == end && isEndOfValue ? BytesUtil.readUnsigned(trieBytes, end, sizeOfId) : -1; if (checkFlag(c, BIT_IS_LAST_CHILD)) return -1; c = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1) + (checkFlag(c, BIT_IS_END_OF_VALUE) ? sizeOfId : 0); } else { // children are ordered by their first value byte
private AppendDictNode rebuildTrieTreeR(int n, AppendDictNode parent) { AppendDictNode root = null; while (true) { int p = n + firstByteOffset; int childOffset = (int) (BytesUtil.readLong(trieBytes, n, sizeChildOffset) & childOffsetMask); int parLen = BytesUtil.readUnsigned(trieBytes, p - 1, 1); boolean isEndOfValue = checkFlag(n, BIT_IS_END_OF_VALUE); byte[] value = new byte[parLen]; System.arraycopy(trieBytes, p, value, 0, parLen); AppendDictNode node = new AppendDictNode(value, isEndOfValue); if (isEndOfValue) { int id = BytesUtil.readUnsigned(trieBytes, p + parLen, sizeOfId); node.id = id; } if (parent == null) { root = node; } else { parent.addChild(node); } if (childOffset != 0) { rebuildTrieTreeR(childOffset + headSize, node); } if (checkFlag(n, BIT_IS_LAST_CHILD)) { break; } else { n += firstByteOffset + parLen + (isEndOfValue ? sizeOfId : 0); } } return root; }
int end = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1); // end of node's value for (p++; p < end && o < inpEnd; p++, o++) { // note matching start from [1] if (trieBytes[p] != inp[o]) { int comp = BytesUtil.compareByteUnsigned(trieBytes[p], inp[o]); if (comp < 0) { seq += BytesUtil.readUnsigned(trieBytes, n + sizeChildOffset, sizeNoValuesBeneath); break; } else if (comp < 0) { // try next child seq += BytesUtil.readUnsigned(trieBytes, c + sizeChildOffset, sizeNoValuesBeneath); if (checkFlag(c, BIT_IS_LAST_CHILD)) return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no child can match the next byte of input c = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1); } else { // children are ordered by their first value byte return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no child can match the next byte of input
@Override public List<ByteArray> reEncodeDictionary(List<ByteArray> value, MeasureDesc measureDesc, Map<TblColRef, Dictionary<String>> oldDicts, Map<TblColRef, Dictionary<String>> newDicts) { TblColRef colRef = getRawColumn(measureDesc.getFunction()); Dictionary<String> sourceDict = oldDicts.get(colRef); Dictionary<String> mergedDict = newDicts.get(colRef); int valueSize = value.size(); byte[] newIdBuf = new byte[valueSize * mergedDict.getSizeOfId()]; int bufOffset = 0; for (ByteArray c : value) { int oldId = BytesUtil.readUnsigned(c.array(), c.offset(), c.length()); int newId; String v = sourceDict.getValueFromId(oldId); if (v == null) { newId = mergedDict.nullId(); } else { newId = mergedDict.getIdFromValue(v); } BytesUtil.writeUnsigned(newId, newIdBuf, bufOffset, mergedDict.getSizeOfId()); c.reset(newIdBuf, bufOffset, mergedDict.getSizeOfId()); bufOffset += mergedDict.getSizeOfId(); } return value; } };
public void testWriteReadUnsignedInt(int testInt, int length) { ByteArray ba = new ByteArray(new byte[length]); BytesUtil.writeUnsigned(testInt, length, ba.asBuffer()); byte[] newBytes = new byte[length]; System.arraycopy(ba.array(), 0, newBytes, 0, length); int value = BytesUtil.readUnsigned(new ByteArray(newBytes).asBuffer(), length); assertEquals(value, testInt); byte[] anOtherNewBytes = new byte[length]; BytesUtil.writeUnsigned(testInt, anOtherNewBytes, 0, length); assertTrue(Arrays.equals(anOtherNewBytes, ba.array())); }
/** the reverse of dictIdToString(), returns integer ID */ public static int stringToDictId(String str) { try { byte[] bytes = str.getBytes("ISO-8859-1"); return BytesUtil.readUnsigned(bytes, 0, bytes.length); } catch (UnsupportedEncodingException e) { // never happen return 0; } } }
int idInSourceDict = BytesUtil.readUnsigned(splittedByteses[useSplit].array(), splittedByteses[useSplit].offset(), splittedByteses[useSplit].length()); int idInMergedDict;