public static MeasureIngester<?>[] create(Collection<MeasureDesc> measures) { MeasureIngester<?>[] result = new MeasureIngester<?>[measures.size()]; int i = 0; for (MeasureDesc measure : measures) { result[i++] = create(measure); } return result; }
private Object[] buildValue(String[] row) { Object[] values = new Object[measureCount]; for (int i = 0; i < measureCount; i++) { String[] colValues = kvBuilder.buildValueOf(i, row); MeasureDesc measure = measureDescs[i]; values[i] = measureIngesters[i].valueOf(colValues, measure, dictionaryMap); } return values; } }
/** * Re-encode with measures in Object[] format. * @param key * @param value * @return * @throws IOException */ public Pair<Text, Object[]> reEncode2(Text key, Text value) throws IOException { if (initialized == false) { throw new IllegalStateException("Not initialized"); } Object[] measureObjs = new Object[measureDescs.size()]; codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), measureObjs); // re-encode measures if dictionary is used if (dictMeasures.size() > 0) { for (Pair<Integer, MeasureIngester> pair : dictMeasures) { int i = pair.getFirst(); MeasureIngester ingester = pair.getSecond(); measureObjs[i] = ingester.reEncodeDictionary(measureObjs[i], measureDescs.get(i), oldDicts, newDicts); } } return Pair.newPair(processKey(key), measureObjs); }
public void resetAggrs() { for (int i = 0; i < cubeDesc.getMeasures().size(); i++) { aggrIngesters[i].reset(); } } }
public void resetAggrs() { for (int i = 0; i < cubeDesc.getMeasures().size(); i++) { aggrIngesters[i].reset(); } } }
public InputConverterUnitForRawData(CubeDesc cubeDesc, IJoinedFlatTableDesc flatDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) { this.flatDesc = new CubeJoinedFlatTableEnrich(flatDesc, cubeDesc); this.measureCount = cubeDesc.getMeasures().size(); this.measureDescs = cubeDesc.getMeasures().toArray(new MeasureDesc[measureCount]); this.measureIngesters = MeasureIngester.create(cubeDesc.getMeasures()); this.dictionaryMap = dictionaryMap; this.kvBuilder = new KeyValueBuilder(this.flatDesc); }
public Object[] buildValueObjects(String[] flatRow) { Object[] measures = new Object[cubeDesc.getMeasures().size()]; for (int i = 0; i < measures.length; i++) { String[] colValues = kvBuilder.buildValueOf(i, flatRow); MeasureDesc measure = measureDescList.get(i); measures[i] = aggrIngesters[i].valueOf(colValues, measure, dictionaryMap); } return measures; }
/** * Re-encode with both dimension and measure in encoded (Text) format. * @param key * @param value * @return * @throws IOException */ public Pair<Text, Text> reEncode(Text key, Text value) throws IOException { if (initialized == false) { throw new IllegalStateException("Not initialized"); } Object[] measureObjs = new Object[measureDescs.size()]; // re-encode measures if dictionary is used if (dictMeasures.size() > 0) { codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), measureObjs); for (Pair<Integer, MeasureIngester> pair : dictMeasures) { int i = pair.getFirst(); MeasureIngester ingester = pair.getSecond(); measureObjs[i] = ingester.reEncodeDictionary(measureObjs[i], measureDescs.get(i), oldDicts, newDicts); } ByteBuffer valueBuf = codec.encode(measureObjs); textValue.set(valueBuf.array(), 0, valueBuf.position()); return Pair.newPair(processKey(key), textValue); } else { return Pair.newPair(processKey(key), value); } }
public BaseCuboidBuilder(KylinConfig kylinConfig, CubeDesc cubeDesc, CubeSegment cubeSegment, CubeJoinedFlatTableEnrich intermediateTableDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) { this.kylinConfig = kylinConfig; this.cubeDesc = cubeDesc; this.cubeSegment = cubeSegment; this.intermediateTableDesc = intermediateTableDesc; this.dictionaryMap = dictionaryMap; Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc); rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid); measureDescList = cubeDesc.getMeasures(); aggrIngesters = MeasureIngester.create(measureDescList); measureCodec = new BufferedMeasureCodec(measureDescList); kvBuilder = new KeyValueBuilder(intermediateTableDesc); }
@Test public void testIngest() { MeasureType<HLLCounter> mtype = (MeasureType<HLLCounter>) MeasureTypeFactory.create(HLLCMeasureType.FUNC_COUNT_DISTINCT, DataType.getType("hllc(10)")); MeasureIngester<HLLCounter> ingester = mtype.newIngester(); HLLCounter hllc; hllc = ingester.valueOf(new String[] { null }, null, null); assertEquals(0, hllc.getCountEstimate()); hllc = ingester.valueOf(new String[] { null, null }, null, null); assertEquals(0, hllc.getCountEstimate()); hllc = ingester.valueOf(new String[] { "" }, null, null); assertEquals(1, hllc.getCountEstimate()); hllc = ingester.valueOf(new String[] { "", null }, null, null); assertEquals(1, hllc.getCountEstimate()); hllc = ingester.valueOf(new String[] { "abc" }, null, null); assertEquals(1, hllc.getCountEstimate()); } }
/** * Re-encode with measures in Object[] format. * @param key * @param value * @return * @throws IOException */ public Pair<Text, Object[]> reEncode2(Text key, Text value) throws IOException { if (initialized == false) { throw new IllegalStateException("Not initialized"); } Object[] measureObjs = new Object[measureDescs.size()]; codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), measureObjs); // re-encode measures if dictionary is used if (dictMeasures.size() > 0) { for (Pair<Integer, MeasureIngester> pair : dictMeasures) { int i = pair.getFirst(); MeasureIngester ingester = pair.getSecond(); measureObjs[i] = ingester.reEncodeDictionary(measureObjs[i], measureDescs.get(i), oldDicts, newDicts); } } return Pair.newPair(processKey(key), measureObjs); }
@Override public Tuple2<ByteArray, Object[]> call(String[] rowArray) throws Exception { if (initialized == false) { synchronized (SparkCubingByLayer.class) { if (initialized == false) { KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); CubeDesc cubeDesc = cubeInstance.getDescriptor(); CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId); CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich( EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId); baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc, AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid), MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap()); initialized = true; } } } } baseCuboidBuilder.resetAggrs(); byte[] rowKey = baseCuboidBuilder.buildKey(rowArray); Object[] result = baseCuboidBuilder.buildValueObjects(rowArray); return new Tuple2<>(new ByteArray(rowKey), result); } }
@Test public void testOverflow() { String text = StringUtils.repeat("h", 21); ExtendedColumnSerializer serializer = new ExtendedColumnSerializer(DataType.getType("extendedcolumn(20)")); MeasureIngester<ByteArray> ingester = measureType.newIngester(); ByteArray array = ingester.valueOf(new String[] { null, text }, null, null); ByteBuffer buffer = ByteBuffer.allocate(serializer.maxLength()); serializer.serialize(array, buffer); buffer.flip(); ByteArray des = serializer.deserialize(buffer); Assert.assertTrue(new ByteArray(StringUtils.repeat("h", 20).getBytes(StandardCharsets.UTF_8)).equals(des)); } }
/** * Re-encode with both dimension and measure in encoded (Text) format. * @param key * @param value * @return * @throws IOException */ public Pair<Text, Text> reEncode(Text key, Text value) throws IOException { if (initialized == false) { throw new IllegalStateException("Not initialized"); } Object[] measureObjs = new Object[measureDescs.size()]; // re-encode measures if dictionary is used if (dictMeasures.size() > 0) { codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), measureObjs); for (Pair<Integer, MeasureIngester> pair : dictMeasures) { int i = pair.getFirst(); MeasureIngester ingester = pair.getSecond(); measureObjs[i] = ingester.reEncodeDictionary(measureObjs[i], measureDescs.get(i), oldDicts, newDicts); } ByteBuffer valueBuf = codec.encode(measureObjs); textValue.set(valueBuf.array(), 0, valueBuf.position()); return Pair.newPair(processKey(key), textValue); } else { return Pair.newPair(processKey(key), value); } }
public static MeasureIngester<?>[] create(Collection<MeasureDesc> measures) { MeasureIngester<?>[] result = new MeasureIngester<?>[measures.size()]; int i = 0; for (MeasureDesc measure : measures) { result[i++] = create(measure); } return result; }
@Test public void testNormal() { String text = StringUtils.repeat("h", 20); ExtendedColumnSerializer serializer = new ExtendedColumnSerializer(DataType.getType("extendedcolumn(20)")); MeasureIngester<ByteArray> ingester = measureType.newIngester(); ByteArray array = ingester.valueOf(new String[] { null, text }, null, null); ByteBuffer buffer = ByteBuffer.allocate(serializer.maxLength()); serializer.serialize(array, buffer); buffer.flip(); ByteArray des = serializer.deserialize(buffer); Assert.assertTrue(new ByteArray(text.getBytes(StandardCharsets.UTF_8)).equals(des)); }
int i = pair.getFirst(); MeasureIngester ingester = pair.getSecond(); measureObjs[i] = ingester.reEncodeDictionary(measureObjs[i], measureDescs.get(i), oldDicts, newDicts);
public static MeasureIngester<?>[] create(Collection<MeasureDesc> measures) { MeasureIngester<?>[] result = new MeasureIngester<?>[measures.size()]; int i = 0; for (MeasureDesc measure : measures) { result[i++] = create(measure); } return result; }
@Test public void testSerDesNull() { ExtendedColumnSerializer serializer = new ExtendedColumnSerializer(DataType.getType("extendedcolumn(20)")); MeasureIngester<ByteArray> ingester = measureType.newIngester(); ByteArray array = ingester.valueOf(new String[] { null, null }, null, null); Assert.assertTrue(new ByteArray().equals(array)); ByteBuffer buffer = ByteBuffer.allocate(serializer.maxLength()); serializer.serialize(array, buffer); buffer.flip(); int length = serializer.peekLength(buffer); Assert.assertTrue(length == 1); ByteArray des = serializer.deserialize(buffer); Assert.assertTrue(new ByteArray().equals(des)); }
public InputConverterUnitForRawData(CubeDesc cubeDesc, IJoinedFlatTableDesc flatDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) { this.flatDesc = new CubeJoinedFlatTableEnrich(flatDesc, cubeDesc); this.measureCount = cubeDesc.getMeasures().size(); this.measureDescs = cubeDesc.getMeasures().toArray(new MeasureDesc[measureCount]); this.measureIngesters = MeasureIngester.create(cubeDesc.getMeasures()); this.dictionaryMap = dictionaryMap; this.kvBuilder = new KeyValueBuilder(this.flatDesc); }