public static final Pair<Bucket, byte[]> fromGroupKey(byte[] keyBytes) { ByteBuffer buf = ByteBuffer.wrap(keyBytes); Bucket bucket = new Bucket(buf.getInt(), DateTimes.utc(buf.getLong()), buf.getInt()); byte[] bytesLeft = new byte[buf.remaining()]; buf.get(bytesLeft); return Pair.of(bucket, bytesLeft); }
@Override protected void reduce(final BytesWritable key, Iterable<BytesWritable> values, final Context context) throws IOException, InterruptedException { Iterator<BytesWritable> iter = values.iterator(); BytesWritable first = iter.next(); if (iter.hasNext()) { LinkedHashSet<String> dimOrder = new LinkedHashSet<>(); SortableBytes keyBytes = SortableBytes.fromBytesWritable(key); Bucket bucket = Bucket.fromGroupKey(keyBytes.getGroupKey()).lhs; IncrementalIndex index = makeIncrementalIndex(bucket, combiningAggs, config, null, null); index.add(InputRowSerde.fromBytes(typeHelperMap, first.getBytes(), aggregators)); while (iter.hasNext()) { context.progress(); InputRow value = InputRowSerde.fromBytes(typeHelperMap, iter.next().getBytes(), aggregators); if (!index.canAppendRow()) { dimOrder.addAll(index.getDimensionOrder()); log.info("current index full due to [%s]. creating new index.", index.getOutOfRowsReason()); flushIndexToContextAndClose(key, index, context); index = makeIncrementalIndex(bucket, combiningAggs, config, dimOrder, index.getColumnCapabilities()); } index.add(value); } flushIndexToContextAndClose(key, index, context); } else { context.write(key, first); } }
public byte[] toGroupKey(byte[]... parts) { ByteBuffer buf = ByteBuffer.allocate(PREAMBLE_BYTES + sizes(parts)); buf.putInt(shardNum); buf.putLong(time.getMillis()); buf.putInt(partitionNum); for (byte[] part : parts) { buf.put(part); } return buf.array(); }
bucket.get().toGroupKey(),
bucket.get().toGroupKey(),
public Bucket apply(HadoopyShardSpec input) return new Bucket(input.getShardNum(), bucketTime, i++);
Bucket bucket = Bucket.fromGroupKey(keyBytes.getGroupKey()).lhs;
public byte[] toGroupKey(byte[]... parts) { ByteBuffer buf = ByteBuffer.allocate(PREAMBLE_BYTES + sizes(parts)); buf.putInt(shardNum); buf.putLong(time.getMillis()); buf.putInt(partitionNum); for (byte[] part : parts) { buf.put(part); } return buf.array(); }
/** * Get the proper bucket for some input row. * * @param inputRow an InputRow * * @return the Bucket that this row belongs to */ public Optional<Bucket> getBucket(InputRow inputRow) { final Optional<Interval> timeBucket = schema.getDataSchema().getGranularitySpec().bucketInterval( DateTimes.utc(inputRow.getTimestampFromEpoch()) ); if (!timeBucket.isPresent()) { return Optional.absent(); } final DateTime bucketStart = timeBucket.get().getStart(); final ShardSpec actualSpec = shardSpecLookups.get(bucketStart.getMillis()) .getShardSpec( rollupGran.bucketStart(inputRow.getTimestamp()).getMillis(), inputRow ); final HadoopyShardSpec hadoopyShardSpec = hadoopShardSpecLookup.get(bucketStart.getMillis()).get(actualSpec); return Optional.of( new Bucket( hadoopyShardSpec.getShardNum(), bucketStart, actualSpec.getPartitionNum() ) ); }
@Override protected void reduce( final BytesWritable key, Iterable<BytesWritable> values, final Context context ) throws IOException, InterruptedException { Iterator<BytesWritable> iter = values.iterator(); BytesWritable first = iter.next(); if (iter.hasNext()) { LinkedHashSet<String> dimOrder = Sets.newLinkedHashSet(); SortableBytes keyBytes = SortableBytes.fromBytesWritable(key); Bucket bucket = Bucket.fromGroupKey(keyBytes.getGroupKey()).lhs; IncrementalIndex index = makeIncrementalIndex(bucket, combiningAggs, config, null, null); index.add(InputRowSerde.fromBytes(typeHelperMap, first.getBytes(), aggregators)); while (iter.hasNext()) { context.progress(); InputRow value = InputRowSerde.fromBytes(typeHelperMap, iter.next().getBytes(), aggregators); if (!index.canAppendRow()) { dimOrder.addAll(index.getDimensionOrder()); log.info("current index full due to [%s]. creating new index.", index.getOutOfRowsReason()); flushIndexToContextAndClose(key, index, context); index = makeIncrementalIndex(bucket, combiningAggs, config, dimOrder, index.getColumnCapabilities()); } index.add(value); } flushIndexToContextAndClose(key, index, context); } else { context.write(key, first); } }
public static final Pair<Bucket, byte[]> fromGroupKey(byte[] keyBytes) { ByteBuffer buf = ByteBuffer.wrap(keyBytes); Bucket bucket = new Bucket(buf.getInt(), DateTimes.utc(buf.getLong()), buf.getInt()); byte[] bytesLeft = new byte[buf.remaining()]; buf.get(bytesLeft); return Pair.of(bucket, bytesLeft); }
Bucket bucket = Bucket.fromGroupKey(keyBytes.getGroupKey()).lhs;
public Bucket apply(HadoopyShardSpec input) return new Bucket(input.getShardNum(), bucketTime, i++);
/** * Get the proper bucket for some input row. * * @param inputRow an InputRow * * @return the Bucket that this row belongs to */ public Optional<Bucket> getBucket(InputRow inputRow) { final Optional<Interval> timeBucket = schema.getDataSchema().getGranularitySpec().bucketInterval( DateTimes.utc(inputRow.getTimestampFromEpoch()) ); if (!timeBucket.isPresent()) { return Optional.absent(); } final DateTime bucketStart = timeBucket.get().getStart(); final ShardSpec actualSpec = shardSpecLookups.get(bucketStart.getMillis()) .getShardSpec( rollupGran.bucketStart(inputRow.getTimestamp()).getMillis(), inputRow ); final HadoopyShardSpec hadoopyShardSpec = hadoopShardSpecLookup.get(bucketStart.getMillis()).get(actualSpec); return Optional.of( new Bucket( hadoopyShardSpec.getShardNum(), bucketStart, actualSpec.getPartitionNum() ) ); }