public boolean isNullId(int id) { int nullId = NULL_ID[getSizeOfId()]; return (nullId & id) == nullId; }
/** * Convenient form of * <code>getIdFromValueBytes(value, offset, len, 0)</code> */ final public int getIdFromValueBytes(byte[] value, int offset, int len) { return getIdFromValueBytes(value, offset, len, 0); }
public static Dictionary mergeDictionaries(DictionaryInfo targetInfo, List<DictionaryInfo> sourceDicts) { HashSet<byte[]> dedup = new HashSet<byte[]>(); for (DictionaryInfo info : sourceDicts) { Dictionary<?> dict = info.getDictionaryObject(); int minkey = dict.getMinId(); int maxkey = dict.getMaxId(); byte[] buffer = new byte[dict.getSizeOfValue()]; for (int i = minkey; i <= maxkey; ++i) { int size = dict.getValueBytesFromId(i, buffer, 0); dedup.add(Bytes.copy(buffer, 0, size)); } } List<byte[]> valueList = new ArrayList<byte[]>(); valueList.addAll(dedup); return buildDictionaryFromValueList(targetInfo, valueList); }
/** * A lower level API, return ID integer from raw value bytes. In case of not * found - if roundingFlag=0, throw IllegalArgumentException; - if * roundingFlag<0, the closest smaller ID integer if exist; - if * roundingFlag>0, the closest bigger ID integer if exist. Bypassing the * cache layer, this could be significantly slower than getIdFromValue(T * value). * * @throws IllegalArgumentException * if value is not found in dictionary and rounding is off or * failed */ final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) { if (isNullByteForm(value, offset, len)) return nullId(); else return getIdFromValueBytesImpl(value, offset, len, roundingFlag); }
/** * Returns the ID integer of given value. In case of not found - if * roundingFlag=0, throw IllegalArgumentException; - if roundingFlag<0, the * closest smaller ID integer if exist; - if roundingFlag>0, the closest * bigger ID integer if exist. The implementation often has cache, thus * faster than the byte[] version getIdFromValueBytes() * * @throws IllegalArgumentException * if value is not found in dictionary and rounding is off or * failed */ final public int getIdFromValue(T value, int roundingFlag) { if (isNullObjectForm(value)) return nullId(); else return getIdFromValueImpl(value, roundingFlag); }
Dictionary<?> mergedDict = dictMgr.getDictionary(mergedCubeSegment.getDictResPath(col)); while (sourceDict.getSizeOfValue() > newKeyBuf.length - bufOffset || mergedDict.getSizeOfValue() > newKeyBuf.length - bufOffset) { byte[] oldBuf = newKeyBuf; newKeyBuf = new byte[2 * newKeyBuf.length]; int idInMergedDict; int size = sourceDict.getValueBytesFromId(idInSourceDict, newKeyBuf, bufOffset); if (size < 0) { idInMergedDict = mergedDict.nullId(); } else { idInMergedDict = mergedDict.getIdFromValueBytes(newKeyBuf, bufOffset, size); BytesUtil.writeUnsigned(idInMergedDict, newKeyBuf, bufOffset, mergedDict.getSizeOfId()); bufOffset += mergedDict.getSizeOfId(); } else {
@Test public void testBitMapContainer() { // create container BitMapContainer container = new BitMapContainer(info.getDigest(), 0); Dictionary<String> dict = info.dict(0); for (int v = dict.getMinId(); v <= dict.getMaxId(); v++) { container.append(v); } container.append(Dictionary.NULL_ID[dict.getSizeOfId()]); container.closeForChange(); // copy by serialization List<ImmutableBytesWritable> bytes = container.toBytes(); BitMapContainer container2 = new BitMapContainer(info.getDigest(), 0); container2.fromBytes(bytes); // check the copy int i = 0; for (int v = dict.getMinId(); v <= dict.getMaxId(); v++) { int value = container2.getValueIntAt(i++); assertEquals(v, value); } assertEquals(Dictionary.NULL_ID[dict.getSizeOfId()], container2.getValueIntAt(i++)); assertEquals(container, container2); }
public void writeColumn(TblColRef column, byte[] value, int valueLen, int roundingFlag, byte dft, byte[] output, int outputOffset) { Dictionary<String> dict = getDictionary(column); int columnLen = getColumnLength(column); // non-dict value if (dict == null) { byte[] valueBytes = padFixLen(columnLen, value); System.arraycopy(valueBytes, 0, output, outputOffset, columnLen); return; } // dict value try { int id = dict.getIdFromValueBytes(value, 0, valueLen, roundingFlag); BytesUtil.writeUnsigned(id, output, outputOffset, dict.getSizeOfId()); } catch (IllegalArgumentException ex) { for (int i = outputOffset; i < outputOffset + columnLen; i++) output[i] = dft; logger.error("Can't translate value " + Bytes.toString(value, 0, valueLen) + " to dictionary ID, roundingFlag " + roundingFlag + ". Using default value " + String.format("\\x%02X", dft)); } }
private TableRecordInfoDigest createDigest() { // isMetric boolean[] isMetric = new boolean[nColumns]; for (int i = 0; i < nColumns; ++i) { isMetric[i] = desc.isMetricsCol(i); } // lengths int[] lengths = new int[nColumns]; for (int i = 0; i < nColumns; ++i) { lengths[i] = isMetric[i] ? measureSerializers[i].getLength() : dictionaries[i].getSizeOfId(); } // dict max id int[] dictMaxIds = new int[nColumns]; for (int i = 0; i < nColumns; ++i) { if (!isMetric[i]) dictMaxIds[i] = dictionaries[i].getMaxId(); } // offsets int pos = 0; int[] offsets = new int[nColumns]; for (int i = 0; i < nColumns; i++) { offsets[i] = pos; pos += lengths[i]; } int byteFormLen = pos; return new TableRecordInfoDigest(nColumns, byteFormLen, offsets, dictMaxIds, lengths, isMetric, measureSerializers); }
/** * get value of columns which belongs to the original table columns. * i.e. columns like min_xx, max_yy will never appear */ public String getValueString(int col) { if (rawRecord.isMetric(col)) return getValueMetric(col); else return info.dict(col).getValueFromId(rawRecord.getValueID(col)); }
private String nullString(TblColRef column) { byte[] id = new byte[columnIO.getColumnLength(column)]; for (int i = 0; i < id.length; i++) { id[i] = Dictionary.NULL; } return Dictionary.dictIdToString(id, 0, id.length); }
/** * Convenient form of <code>getIdFromValue(value, 0)</code> */ final public int getIdFromValue(T value) { return getIdFromValue(value, 0); }
Dictionary<String> dict = info.dict(0); byte[] buf = new byte[dict.getSizeOfId()]; ImmutableBytesWritable bytes = new ImmutableBytesWritable(buf); for (int v = dict.getMinId(); v <= dict.getMaxId(); v++) { BytesUtil.writeUnsigned(v, buf, 0, dict.getSizeOfId()); container.append(bytes); BytesUtil.writeUnsigned(Dictionary.NULL_ID[dict.getSizeOfId()], buf, 0, dict.getSizeOfId()); container.append(bytes); container.closeForChange(); for (int v = dict.getMinId(); v <= dict.getMaxId(); v++) { container2.getValueAt(i++, bytes); int value = BytesUtil.readUnsigned(bytes.get(), bytes.getOffset(), int value = BytesUtil.readUnsigned(bytes.get(), bytes.getOffset(), bytes.getLength()); assertEquals(Dictionary.NULL_ID[dict.getSizeOfId()], value); assertEquals(container, container2);
public String readColumnString(TblColRef col, byte[] bytes, int bytesLen) { Dictionary<String> dict = getDictionary(col); if (dict == null) { bytes = Bytes.head(bytes, bytesLen); if (isNull(bytes)) { return null; } bytes = removeFixLenPad(bytes, 0); return Bytes.toString(bytes); } else { int id = BytesUtil.readUnsigned(bytes, 0, bytesLen); try { String value = dict.getValueFromId(id); return value; } catch (IllegalArgumentException e) { logger.error("Can't get dictionary value for column " + col.getName() + " (id = " + id + ")"); return ""; } } }
public static String idToStr(int id) { byte[] bytes = new byte[] { (byte) id }; return Dictionary.dictIdToString(bytes, 0, bytes.length); }
public static Dictionary<?> buildDictionaryFromValueList(DictionaryInfo info, List<byte[]> values) { info.setCardinality(values.size()); Dictionary dict = null; int baseId = 0; // always 0 for now int nSamples = 5; ArrayList samples = new ArrayList(); // build dict, case by data type DataType dataType = DataType.getInstance(info.getDataType()); if (dataType.isDateTimeFamily()) dict = buildDateStrDict(values, baseId, nSamples, samples); else if (dataType.isNumberFamily()) dict = buildNumberDict(values, baseId, nSamples, samples); else dict = buildStringDict(values, baseId, nSamples, samples); // log a few samples StringBuilder buf = new StringBuilder(); for (Object s : samples) { if (buf.length() > 0) buf.append(", "); buf.append(s.toString()).append("=>").append(dict.getIdFromValue(s)); } logger.info("Dictionary value samples: " + buf.toString()); logger.info("Dictionary cardinality " + info.getCardinality()); if (values.size() > DICT_MAX_CARDINALITY) throw new IllegalArgumentException("Too high cardinality is not suitable for dictionary -- " + info.getSourceTable() + "." + info.getSourceColumn() + " cardinality: " + values.size()); return dict; }
public int nullId() { return NULL_ID[getSizeOfId()]; }
@SuppressWarnings("unchecked") private void touchDictValues(DictionaryInfo info1) { Dictionary<String> dict = (Dictionary<String>) info1.getDictionaryObject(); HashSet<String> set = new HashSet<String>(); for (int i = 0, n = info1.getCardinality(); i < n; i++) { set.add(dict.getValueFromId(i)); } assertEquals(info1.getCardinality(), set.size()); } }
private String translate(TblColRef column, String v, int roundingFlag) { byte[] value = Bytes.toBytes(v); byte[] id = new byte[columnIO.getColumnLength(column)]; columnIO.writeColumn(column, value, value.length, roundingFlag, Dictionary.NULL, id, 0); return Dictionary.dictIdToString(id, 0, id.length); } }
public void setValueString(int col, String value) { if (rawRecord.isMetric(col)) { LongWritable v = rawRecord.codec(col).valueOf(value); setValueMetrics(col, v); } else { int id = info.dict(col).getIdFromValue(value); rawRecord.setValueID(col, id); } }