SplitMerger() { reuseAggrs = new MeasureAggregators(cubeDesc.getMeasures()); reuseMetricsArray = new Object[cubeDesc.getMeasures().size()]; }
@Override public void doReduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { aggs.reset(); for (Text value : values) { if (vcounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) { logger.info("Handling value with ordinal (This is not KV number!): " + vcounter); } codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), input); aggs.aggregate(input, needAggrMeasures); } aggs.collectStates(result); ByteBuffer valueBuf = codec.encode(result); outputValue.set(valueBuf.array(), 0, valueBuf.position()); context.write(key, outputValue); } }
public void flush() throws IOException { logger.info("AggregationCache(size={} est_mem_size={} threshold={}) will spill to {}", buffMap.size(), estMemSize, spillThreshold, dumpedFile.getAbsolutePath()); ByteArrayOutputStream baos = new ByteArrayOutputStream(MAX_BUFFER_SIZE); if (buffMap != null) { DataOutputStream bos = new DataOutputStream(baos); Object[] aggrResult = null; try { bos.writeInt(buffMap.size()); for (Entry<byte[], MeasureAggregator[]> entry : buffMap.entrySet()) { MeasureAggregators aggs = new MeasureAggregators(entry.getValue()); aggrResult = new Object[metrics.trueBitCount()]; aggs.collectStates(aggrResult); ByteBuffer metricsBuf = measureCodec.encode(aggrResult); bos.writeInt(entry.getKey().length); bos.write(entry.getKey()); bos.writeInt(metricsBuf.position()); bos.write(metricsBuf.array(), 0, metricsBuf.position()); } } finally { buffMap = null; IOUtils.closeQuietly(bos); } } spillBuffer = baos.toByteArray(); IOUtils.closeQuietly(baos); logger.info("Accurately spill data size = {}", spillBuffer.length); }
@Override public Object[] call(Object[] input1, Object[] input2) throws Exception { Object[] measureObjs = new Object[input1.length]; aggregators.aggregate(input1, input2, measureObjs); return measureObjs; } };
@Override public Entry<byte[], MeasureAggregator[]> next() { // Use minimum heap to merge sort the keys, // also do aggregation for measures with same keys in different dumps resultAggrs.reset(); byte[] peekKey = minHeap.peek().getKey(); internalAggregate(); while (!minHeap.isEmpty() && bytesComparator.compare(peekKey, minHeap.peek().getKey()) == 0) { internalAggregate(); } return new SimpleEntry(peekKey, resultMeasureAggregators); }
private void internalAggregate() { Entry<byte[], Integer> peekEntry = minHeap.poll(); resultAggrs.aggregate(dumpCurrentValues.get(peekEntry.getValue())); enqueueFromDump(peekEntry.getValue()); }
public void flush() throws IOException { logger.info("AggregationCache(size={} est_mem_size={} threshold={}) will spill to {}", buffMap.size(), estMemSize, spillThreshold, dumpedFile.getAbsolutePath()); ByteArrayOutputStream baos = new ByteArrayOutputStream(MAX_BUFFER_SIZE); if (buffMap != null) { DataOutputStream bos = new DataOutputStream(baos); Object[] aggrResult = null; try { bos.writeInt(buffMap.size()); for (Entry<byte[], MeasureAggregator[]> entry : buffMap.entrySet()) { MeasureAggregators aggs = new MeasureAggregators(entry.getValue()); aggrResult = new Object[metrics.trueBitCount()]; aggs.collectStates(aggrResult); ByteBuffer metricsBuf = measureCodec.encode(aggrResult); bos.writeInt(entry.getKey().length); bos.write(entry.getKey()); bos.writeInt(metricsBuf.position()); bos.write(metricsBuf.array(), 0, metricsBuf.position()); } } finally { buffMap = null; IOUtils.closeQuietly(bos); } } spillBuffer = baos.toByteArray(); IOUtils.closeQuietly(baos); logger.info("Accurately spill data size = {}", spillBuffer.length); }
@Override public Entry<byte[], MeasureAggregator[]> next() { // Use minimum heap to merge sort the keys, // also do aggregation for measures with same keys in different dumps resultAggrs.reset(); byte[] peekKey = minHeap.peek().getKey(); internalAggregate(); while (!minHeap.isEmpty() && bytesComparator.compare(peekKey, minHeap.peek().getKey()) == 0) { internalAggregate(); } return new SimpleEntry(peekKey, resultMeasureAggregators); }
@Override public void doReduce(ByteArrayWritable key, Iterable<ByteArrayWritable> values, Context context) throws IOException, InterruptedException { aggs.reset(); for (ByteArrayWritable value : values) { if (vcounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) { logger.info("Handling value with ordinal (This is not KV number!): " + vcounter); } codec.decode(value.asBuffer(), input); aggs.aggregate(input); } aggs.collectStates(result); // output key outputKey.set(key.array(), key.offset(), key.length()); // output value ByteBuffer valueBuf = codec.encode(result); outputValue.set(valueBuf.array(), 0, valueBuf.position()); context.write(outputKey, outputValue); }
Merger() { reuseAggrs = new MeasureAggregators(cubeDesc.getMeasures()); reuseMetricsArray = new Object[cubeDesc.getMeasures().size()]; }
@Override public Object[] call(Object[] input1, Object[] input2) throws Exception { if (initialized == false) { synchronized (SparkCubingByLayer.class) { if (initialized == false) { init(); initialized = true; } } } Object[] result = new Object[measureNum]; aggregators.aggregate(input1, input2, result, needAggr); return result; } }
reuseAggrs.reset(); reuseAggrs.aggregate(metrics); do { ResultMergeSlot slot = heap.poll(); open.add(slot); metrics = getMetricsValues(slot.currentRecord); reuseAggrs.aggregate(metrics); } while (smallest.isSameKey(heap.peek())); reuseAggrs.collectStates(metrics); setMetricsValues(smallest.currentRecord, metrics);
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase(Locale.ROOT); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); List<MeasureDesc> measuresDescs = cubeDesc.getMeasures(); codec = new BufferedMeasureCodec(measuresDescs); aggs = new MeasureAggregators(measuresDescs); input = new Object[measuresDescs.size()]; result = new Object[measuresDescs.size()]; outputKey = new Text(); outputValue = new Text(); }
@Override public Object[] call(Object[] input1, Object[] input2) throws Exception { if (initialized == false) { synchronized (SparkCubingByLayer.class) { if (initialized == false) { init(); initialized = true; } } } Object[] result = new Object[measureNum]; aggregators.aggregate(input1, input2, result); return result; } }
reuseAggrs.reset(); reuseAggrs.aggregate(metrics); do { MergeSlot slot = heap.poll(); open.add(slot); metrics = getMetricsValues(slot.currentRecord); reuseAggrs.aggregate(metrics); } while (smallest.isSameKey(heap.peek())); reuseAggrs.collectStates(metrics); setMetricsValues(smallest.currentRecord, metrics);
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase(Locale.ROOT); // only used in Build job, not in Merge job cuboidLevel = context.getConfiguration().getInt(BatchConstants.CFG_CUBE_CUBOID_LEVEL, 0); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); cubeDesc = CubeManager.getInstance(config).getCube(cubeName).getDescriptor(); measuresDescs = cubeDesc.getMeasures(); codec = new BufferedMeasureCodec(measuresDescs); aggs = new MeasureAggregators(measuresDescs); input = new Object[measuresDescs.size()]; result = new Object[measuresDescs.size()]; List<Integer> needAggMeasuresList = Lists.newArrayList(); for (int i = 0; i < measuresDescs.size(); i++) { if (cuboidLevel == 0) { needAggMeasuresList.add(i); } else { if (!measuresDescs.get(i).getFunction().getMeasureType().onlyAggrInBaseCuboid()) { needAggMeasuresList.add(i); } } } needAggrMeasures = new int[needAggMeasuresList.size()]; for (int i = 0; i < needAggMeasuresList.size(); i++) { needAggrMeasures[i] = needAggMeasuresList.get(i); } }
@Override public Object[] call(Object[] input1, Object[] input2) throws Exception { Object[] measureObjs = new Object[input1.length]; aggregators.aggregate(input1, input2, measureObjs); return measureObjs; } };
@Override public void doReduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { aggs.reset(); for (Text value : values) { if (vcounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) { logger.info("Handling value with ordinal (This is not KV number!): " + vcounter); } codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), input); aggs.aggregate(input, needAggrMeasures); } aggs.collectStates(result); ByteBuffer valueBuf = codec.encode(result); outputValue.set(valueBuf.array(), 0, valueBuf.position()); context.write(key, outputValue); } }
public void init() { KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); cubeDesc = cubeInstance.getDescriptor(); aggregators = new MeasureAggregators(cubeDesc.getMeasures()); measureNum = cubeDesc.getMeasures().size(); } }
private void internalAggregate() { Entry<byte[], Integer> peekEntry = minHeap.poll(); resultAggrs.aggregate(dumpCurrentValues.get(peekEntry.getValue())); enqueueFromDump(peekEntry.getValue()); }