/** * Constructor that initializes the GroupData with a {@link Set} of {@link GroupOperation} and a {@link Map} of * Strings that represent the group fields. * * @param groupFields The mappings of field names to their values that represent this group. * @param operations the non-null operations that this will compute metrics for. */ public GroupData(Map<String, String> groupFields, Set<GroupOperation> operations) { this.groupFields = groupFields; this.metrics = makeInitialMetrics(operations); }
/** * Constructor that requires an {@link Aggregation} and a {@link BulletConfig} configuration. * * @param aggregation An {@link Aggregation} with valid fields and attributes for this aggregation type. * @param config The config that has relevant configs for this strategy. */ @SuppressWarnings("unchecked") public GroupBy(Aggregation aggregation, BulletConfig config) { super(aggregation, config); Map<String, Object> attributes = aggregation.getAttributes(); operations = GroupOperation.getOperations(attributes); Map<GroupOperation, Number> metrics = GroupData.makeInitialMetrics(operations); container = new CachingGroupData(null, metrics); ResizeFactor resizeFactor = getResizeFactor(config, BulletConfig.GROUP_AGGREGATION_SKETCH_RESIZE_FACTOR); float samplingProbability = config.getAs(BulletConfig.GROUP_AGGREGATION_SKETCH_SAMPLING, Float.class); // Default at 512 gives a 13.27% error rate at 99.73% confidence (3 SD). Irrelevant since we are using this to // mostly cap the number of groups. You can use the Sketch theta to extrapolate the aggregation for all the data. int nominalEntries = config.getAs(BulletConfig.GROUP_AGGREGATION_SKETCH_ENTRIES, Integer.class); int maximumSize = config.getAs(BulletConfig.GROUP_AGGREGATION_MAX_SIZE, Integer.class); int size = Math.min(aggregation.getSize(), maximumSize); sketch = new TupleSketch(resizeFactor, samplingProbability, nominalEntries, size, config.getBulletRecordProvider()); }