/** * Convenient form of <code>getIdFromValue(value, 0)</code> */ final public int getIdFromValue(T value) { return getIdFromValue(value, 0); }
public static Dictionary<?> buildDictionaryFromValueList(DictionaryInfo info, List<byte[]> values) { info.setCardinality(values.size()); Dictionary dict = null; int baseId = 0; // always 0 for now int nSamples = 5; ArrayList samples = new ArrayList(); // build dict, case by data type DataType dataType = DataType.getInstance(info.getDataType()); if (dataType.isDateTimeFamily()) dict = buildDateStrDict(values, baseId, nSamples, samples); else if (dataType.isNumberFamily()) dict = buildNumberDict(values, baseId, nSamples, samples); else dict = buildStringDict(values, baseId, nSamples, samples); // log a few samples StringBuilder buf = new StringBuilder(); for (Object s : samples) { if (buf.length() > 0) buf.append(", "); buf.append(s.toString()).append("=>").append(dict.getIdFromValue(s)); } logger.info("Dictionary value samples: " + buf.toString()); logger.info("Dictionary cardinality " + info.getCardinality()); if (values.size() > DICT_MAX_CARDINALITY) throw new IllegalArgumentException("Too high cardinality is not suitable for dictionary -- " + info.getSourceTable() + "." + info.getSourceColumn() + " cardinality: " + values.size()); return dict; }
public void setValueString(int col, String value) { if (rawRecord.isMetric(col)) { LongWritable v = rawRecord.codec(col).valueOf(value); setValueMetrics(col, v); } else { int id = info.dict(col).getIdFromValue(value); rawRecord.setValueID(col, id); } }