public static CoprocessorRowType fromTableRecordInfo(TableRecordInfo tableRecordInfo, List<TblColRef> cols) { int[] colSizes = new int[cols.size()]; for (int i = 0; i < cols.size(); i++) { colSizes[i] = tableRecordInfo.getDigest().length(i); } return new CoprocessorRowType(cols.toArray(new TblColRef[cols.size()]), colSizes); }
public static CoprocessorProjector makeForEndpoint(final TableRecordInfo tableInfo, final Collection<TblColRef> groupby) { byte[] mask = new byte[tableInfo.getDigest().getByteFormLen()]; int maskIdx = 0; for (int i = 0; i < tableInfo.getDigest().getColumnCount(); ++i) { TblColRef tblColRef = tableInfo.getColumns().get(i); int length = tableInfo.getDigest().length(i); byte bits = groupby.contains(tblColRef) ? (byte) 0xff : 0x00; for (int j = 0; j < length; ++j) { mask[maskIdx++] = bits; } } return new CoprocessorProjector(mask); }
public SliceBuilder(TableRecordInfo info, short shard) { this.info = info; this.nColumns = info.getDigest().getColumnCount(); this.nRecordsCap = Math.max(1, info.getDescriptor().getSliceSize()); this.shard = shard; this.sliceTimestamp = Long.MIN_VALUE; this.nRecords = 0; this.containers = null; doneSlice(); // init containers }
@Override public int getColumnLength(TblColRef col) { int index = getTableRecordInfo().findColumn(col); return getTableRecordInfo().getDigest().length(index); }
private Slice doneSlice() { Slice r = null; if (nRecords > 0) { for (int i = 0; i < nColumns; i++) { containers[i].closeForChange(); } r = new Slice(info.getDigest(), shard, sliceTimestamp, containers); } // reset for next slice nRecords = 0; containers = new ColumnValueContainer[nColumns]; for (int i : info.getDescriptor().getBitmapColumns()) { containers[i] = new BitMapContainer(info.getDigest(), i); } for (int i : info.getDescriptor().getValueColumns()) { containers[i] = new CompressedValueContainer(info.getDigest(), i, nRecordsCap); } for (int i : info.getDescriptor().getMetricsColumns()) { containers[i] = new CompressedValueContainer(info.getDigest(), i, nRecordsCap); } return r; }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf); IIManager mgr = IIManager.getInstance(config); IIInstance ii = mgr.getII(conf.get(BatchConstants.CFG_II_NAME)); IISegment seg = ii.getSegment(conf.get(BatchConstants.CFG_II_SEGMENT_NAME), SegmentStatusEnum.NEW); info = new TableRecordInfo(seg); rec = info.createTableRecord(); builder = null; kv = new IIKeyValueCodec(info.getDigest()); }
public static void main(String[] args) throws IOException { Configuration hconf = HadoopUtil.getCurrentConfiguration(); IIManager mgr = IIManager.getInstance(KylinConfig.getInstanceFromEnv()); String iiName = args[0]; IIInstance ii = mgr.getII(iiName); String path = args[1]; System.out.println("Reading from " + path + " ..."); TableRecordInfo info = new TableRecordInfo(ii.getFirstSegment()); IIKeyValueCodec codec = new IIKeyValueCodec(info.getDigest()); int count = 0; for (Slice slice : codec.decodeKeyValue(readSequenceKVs(hconf, path))) { for (RawTableRecord rec : slice) { System.out.printf(new TableRecord(rec, info).toString()); count++; } } System.out.println("Total " + count + " records"); }
public static EndpointAggregators fromFunctions(TableRecordInfo tableInfo, List<FunctionDesc> metrics) { String[] funcNames = new String[metrics.size()]; String[] dataTypes = new String[metrics.size()]; MetricInfo[] metricInfos = new MetricInfo[metrics.size()]; for (int i = 0; i < metrics.size(); i++) { FunctionDesc functionDesc = metrics.get(i); //TODO: what if funcionDesc's type is different from tablDesc? cause scale difference funcNames[i] = functionDesc.getExpression(); dataTypes[i] = functionDesc.getReturnType(); if (functionDesc.isCount()) { metricInfos[i] = new MetricInfo(MetricType.Count); } else if (functionDesc.isDimensionAsMetric()) { metricInfos[i] = new MetricInfo(MetricType.DimensionAsMetric); } else { int index = tableInfo.findFactTableColumn(functionDesc.getParameter().getValue()); if (index < 0) { throw new IllegalStateException("Column " + functionDesc.getParameter().getValue() + " is not found in II"); } if (functionDesc.isCountDistinct()) { metricInfos[i] = new MetricInfo(MetricType.DistinctCount, index, functionDesc.getReturnDataType().getPrecision()); } else { metricInfos[i] = new MetricInfo(MetricType.Normal, index); } } } return new EndpointAggregators(funcNames, dataTypes, metricInfos, tableInfo.getDigest()); }
@Test public void testCodec() throws IOException { List<TableRecord> records = loadRecordsSorted(); System.out.println(records.size() + " records"); List<Slice> slices = buildTimeSlices(records); System.out.println(slices.size() + " slices"); IIKeyValueCodec codec = new IIKeyValueCodec(info.getDigest()); List<Pair<ImmutableBytesWritable, ImmutableBytesWritable>> kvs = encodeKVs( codec, slices); System.out.println(kvs.size() + " KV pairs"); List<Slice> slicesCopy = decodeKVs(codec, kvs); assertEquals(slices, slicesCopy); List<TableRecord> recordsCopy = iterateRecords(slicesCopy); assertEquals(new HashSet<TableRecord>(records), new HashSet<TableRecord>(recordsCopy)); dump(recordsCopy); }
@Test public void testLoad() throws Exception { String tableName = seg.getStorageLocationIdentifier(); IIKeyValueCodec codec = new IIKeyValueCodec(info.getDigest()); List<Slice> slices = Lists.newArrayList(); HBaseClientKVIterator kvIterator = new HBaseClientKVIterator(hconn, tableName, IIDesc.HBASE_FAMILY_BYTES, IIDesc.HBASE_QUALIFIER_BYTES); try { for (Slice slice : codec.decodeKeyValue(kvIterator)) { slices.add(slice); } } finally { kvIterator.close(); } List<TableRecord> records = iterateRecords(slices); dump(records); System.out.println(records.size() + " records"); }
@Test public void testSerialize() { byte[] x = TableRecordInfoDigest.serialize(this.tableRecordInfo.getDigest()); TableRecordInfoDigest d = TableRecordInfoDigest.deserialize(x); assertEquals(d.getColumnCount(), 25); }
@Test public void testBitMapContainer() { // create container BitMapContainer container = new BitMapContainer(info.getDigest(), 0); Dictionary<String> dict = info.dict(0); for (int v = dict.getMinId(); v <= dict.getMaxId(); v++) { container.append(v); } container.append(Dictionary.NULL_ID[dict.getSizeOfId()]); container.closeForChange(); // copy by serialization List<ImmutableBytesWritable> bytes = container.toBytes(); BitMapContainer container2 = new BitMapContainer(info.getDigest(), 0); container2.fromBytes(bytes); // check the copy int i = 0; for (int v = dict.getMinId(); v <= dict.getMaxId(); v++) { int value = container2.getValueIntAt(i++); assertEquals(v, value); } assertEquals(Dictionary.NULL_ID[dict.getSizeOfId()], container2.getValueIntAt(i++)); assertEquals(container, container2); }
private Slice build(List<List<String>> table, final TableRecordInfo tableRecordInfo, Dictionary<?>[] localDictionary) { final Slice slice = sliceMaker.makeSlice(tableRecordInfo.getDigest(), Lists.transform(table, new Function<List<String>, TableRecord>() { @Nullable @Override public TableRecord apply(@Nullable List<String> input) { TableRecord result = tableRecordInfo.createTableRecord(); for (int i = 0; i < input.size(); i++) { result.setValueString(i, input.get(i)); } return result; } })); slice.setLocalDictionaries(localDictionary); return slice; } }
public IncrementalSliceMaker(TableRecordInfo info, short shard) { this.info = info; this.nColumns = info.getDigest().getColumnCount(); this.nRecordsCap = Math.max(1, info.getDescriptor().getSliceSize()); this.shard = shard; this.sliceTimestamp = Long.MIN_VALUE; this.nRecords = 0; this.containers = null; doneSlice(); // init containers }
private Slice doneSlice() { Slice r = null; if (nRecords > 0) { for (int i = 0; i < nColumns; i++) { containers[i].closeForChange(); } r = new Slice(info.getDigest(), shard, sliceTimestamp, containers); } // reset for next slice nRecords = 0; containers = new ColumnValueContainer[nColumns]; // for (int i : info.getDescriptor().getBitmapColumns()) { // containers[i] = new CompressedValueContainer(info.getDigest(), i, // nRecordsCap); // } for (int i : info.getDescriptor().getValueColumns()) { containers[i] = new CompressedValueContainer(info.getDigest(), i, nRecordsCap); } for (int i : info.getDescriptor().getMetricsColumns()) { containers[i] = new CompressedValueContainer(info.getDigest(), i, nRecordsCap); } return r; }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf); IIManager mgr = IIManager.getInstance(config); IIInstance ii = mgr.getII(conf.get(BatchConstants.CFG_II_NAME)); IISegment seg = ii.getSegment(conf.get(BatchConstants.CFG_II_SEGMENT_NAME), SegmentStatusEnum.NEW); info = new TableRecordInfo(seg); rec = info.createTableRecord(); builder = null; kv = new IIKeyValueCodec(info.getDigest()); }