private String getFieldsAsString(List<String> fields, Map<String, String> mapping) { return composeField(fields.stream().map(mapping::get)); } }
@Override public void consume(BulletRecord data) { Map<String, String> fieldToValues = getFields(data); // More optimal than calling composeFields String key = getFieldsAsString(fields, fieldToValues); // Set the record and the group values into the container. The metrics are already initialized. container.setCachedRecord(data); container.setGroupFields(fieldToValues); sketch.update(key, container); }
@Override public List<BulletRecord> getRecords() { List<BulletRecord> result = super.getRecords(); renameFields(result); return result; }
/** * Constructor that requires an {@link Aggregation} and a {@link BulletConfig} configuration. * * @param aggregation An {@link Aggregation} with valid fields and attributes for this aggregation type. * @param config The config that has relevant configs for this strategy. */ @SuppressWarnings("unchecked") public GroupBy(Aggregation aggregation, BulletConfig config) { super(aggregation, config); Map<String, Object> attributes = aggregation.getAttributes(); operations = GroupOperation.getOperations(attributes); Map<GroupOperation, Number> metrics = GroupData.makeInitialMetrics(operations); container = new CachingGroupData(null, metrics); ResizeFactor resizeFactor = getResizeFactor(config, BulletConfig.GROUP_AGGREGATION_SKETCH_RESIZE_FACTOR); float samplingProbability = config.getAs(BulletConfig.GROUP_AGGREGATION_SKETCH_SAMPLING, Float.class); // Default at 512 gives a 13.27% error rate at 99.73% confidence (3 SD). Irrelevant since we are using this to // mostly cap the number of groups. You can use the Sketch theta to extrapolate the aggregation for all the data. int nominalEntries = config.getAs(BulletConfig.GROUP_AGGREGATION_SKETCH_ENTRIES, Integer.class); int maximumSize = config.getAs(BulletConfig.GROUP_AGGREGATION_MAX_SIZE, Integer.class); int size = Math.min(aggregation.getSize(), maximumSize); sketch = new TupleSketch(resizeFactor, samplingProbability, nominalEntries, size, config.getBulletRecordProvider()); }
/** * Returns a new {@link Strategy} instance that can handle this aggregation. * * @param aggregation The non-null, initialized {@link Aggregation} instance. * @param config The {@link BulletConfig} containing configuration for the strategy. * * @return The created instance of a strategy that can implement the Aggregation. */ public static Strategy findStrategy(Aggregation aggregation, BulletConfig config) { // Guaranteed to be present. switch (aggregation.getType()) { case COUNT_DISTINCT: return new CountDistinct(aggregation, config); case DISTRIBUTION: return new Distribution(aggregation, config); case RAW: return new Raw(aggregation, config); case TOP_K: return new TopK(aggregation, config); } // If we have any fields -> GroupBy return Utilities.isEmpty(aggregation.getFields()) ? new GroupAll(aggregation, config) : new GroupBy(aggregation, config); } }
@Override public Clip getResult() { Clip result = super.getResult(); renameFields(result.getRecords()); return result; }