@Override public void reset() { data = new GroupData(operations); } }
@Override public void consume(BulletRecord data) { this.data.consume(data); }
@Override public List<BulletRecord> getRecords() { List<BulletRecord> list = new ArrayList<>(); list.add(data.getMetricsAsBulletRecord(provider)); return list; }
private static byte[] getGroupDataWithCount(String countField, int count) { GroupData groupData = new GroupData(new HashSet<>(singletonList(new GroupOperation(COUNT, null, countField)))); IntStream.range(0, count).forEach(i -> groupData.consume(RecordBox.get().getRecord())); return SerializerDeserializer.toBytes(groupData); }
private void addToRecord(Map.Entry<GroupOperation, Number> metric, BulletRecord record) { GroupOperation operation = metric.getKey(); Number value = metric.getValue(); switch (operation.getType()) { case COUNT: record.setLong(getResultName(operation), value == null ? 0 : value.longValue()); break; case AVG: record.setDouble(getResultName(operation), calculateAvg(value, operation.getField())); break; case COUNT_FIELD: // Internal use only for AVG. Not exposed. break; case MIN: case MAX: case SUM: record.setDouble(getResultName(operation), value == null ? null : value.doubleValue()); break; } }
@Override public void combine(byte[] data) { this.data.combine(data); }
/** * Constructor that initializes the GroupData with a {@link Set} of {@link GroupOperation} and a {@link Map} of * Strings that represent the group fields. * * @param groupFields The mappings of field names to their values that represent this group. * @param operations the non-null operations that this will compute metrics for. */ public GroupData(Map<String, String> groupFields, Set<GroupOperation> operations) { this.groupFields = groupFields; this.metrics = makeInitialMetrics(operations); }
/** * Gets the metrics and the group values stored as a {@link BulletRecord}. * * @param provider A BulletRecordProvider to generate BulletRecords. * @return A non-null {@link BulletRecord} containing the data stored in this object. */ public BulletRecord getAsBulletRecord(BulletRecordProvider provider) { return getAsBulletRecord(Collections.emptyMap(), provider); }
private void combine(Map.Entry<GroupOperation, Number> metric, GroupData otherData) { GroupOperation operation = metric.getKey(); Number value = otherData.metrics.get(metric.getKey()); switch (operation.getType()) { case MIN: updateMetric(value, metric, GroupOperation.MIN); break; case MAX: updateMetric(value, metric, GroupOperation.MAX); break; case SUM: case AVG: updateMetric(value, metric, GroupOperation.SUM); break; case COUNT: case COUNT_FIELD: updateMetric(value, metric, GroupOperation.COUNT); break; } }
/** * Gets the metrics stored for the group as a {@link BulletRecord}. * * @param provider A BulletRecordProvider to generate BulletRecords. * @return A non-null {@link BulletRecord} containing the data stored in this object. */ public BulletRecord getMetricsAsBulletRecord(BulletRecordProvider provider) { BulletRecord record = provider.getInstance(); metrics.entrySet().stream().forEach(e -> addToRecord(e, record)); return record; }
/** * Merge a GroupData into this. For all GroupOperations present, their corresponding values will be * merged according to their respective additive operation. * * @param otherData The other GroupData to merge. */ public void combine(GroupData otherData) { metrics.entrySet().stream().forEach(e -> combine(e, otherData)); }
/** * Constructor that requires an {@link Aggregation} and a {@link BulletConfig} configuration. * * @param aggregation An {@link Aggregation} with valid fields and attributes for this aggregation type. * @param config The config that has relevant configs for this strategy. */ @SuppressWarnings("unchecked") public GroupBy(Aggregation aggregation, BulletConfig config) { super(aggregation, config); Map<String, Object> attributes = aggregation.getAttributes(); operations = GroupOperation.getOperations(attributes); Map<GroupOperation, Number> metrics = GroupData.makeInitialMetrics(operations); container = new CachingGroupData(null, metrics); ResizeFactor resizeFactor = getResizeFactor(config, BulletConfig.GROUP_AGGREGATION_SKETCH_RESIZE_FACTOR); float samplingProbability = config.getAs(BulletConfig.GROUP_AGGREGATION_SKETCH_SAMPLING, Float.class); // Default at 512 gives a 13.27% error rate at 99.73% confidence (3 SD). Irrelevant since we are using this to // mostly cap the number of groups. You can use the Sketch theta to extrapolate the aggregation for all the data. int nominalEntries = config.getAs(BulletConfig.GROUP_AGGREGATION_SKETCH_ENTRIES, Integer.class); int maximumSize = config.getAs(BulletConfig.GROUP_AGGREGATION_MAX_SIZE, Integer.class); int size = Math.min(aggregation.getSize(), maximumSize); sketch = new TupleSketch(resizeFactor, samplingProbability, nominalEntries, size, config.getBulletRecordProvider()); }
@Override public List<BulletRecord> getRecords() { merge(); List<BulletRecord> result = new ArrayList<>(); SketchIterator<GroupDataSummary> iterator = this.result.iterator(); for (int count = 0; iterator.next() && count < maxSize; count++) { GroupData data = iterator.getSummary().getData(); result.add(data.getAsBulletRecord(provider)); } return result; }
private void consume(Map.Entry<GroupOperation, Number> metric, BulletRecord data) { GroupOperation operation = metric.getKey(); GroupOperation.GroupOperationType type = operation.getType(); Number casted = 1L; switch (type) { case COUNT: break; case MIN: case MAX: case SUM: case AVG: casted = extractFieldAsNumber(operation.getField(), data); break; case COUNT_FIELD: casted = extractFieldAsNumber(operation.getField(), data) ; casted = casted != null ? 1L : null; break; } updateMetric(casted, metric, GroupOperation.OPERATORS.get(type)); }
/** * Consumes the given {@link BulletRecord} and computes group operation metrics. * * @param data The record to compute metrics for. */ public void consume(BulletRecord data) { metrics.entrySet().stream().forEach(e -> consume(e, data)); }
/** * Gets the metrics and the group values stored as a {@link BulletRecord}. * * @param mapping An non-null new name mapping for the names of the group fields. * @param provider A BulletRecordProvider to generate BulletRecords. * @return A non-null {@link BulletRecord} containing the data stored in this object. */ public BulletRecord getAsBulletRecord(Map<String, String> mapping, BulletRecordProvider provider) { BulletRecord record = getMetricsAsBulletRecord(provider); for (Map.Entry<String, String> e : groupFields.entrySet()) { String field = e.getKey(); String mapped = mapping.get(field); record.setString(Utilities.isEmpty(mapped) ? field : mapped, e.getValue()); } return record; }
/** * Constructor that requires an {@link Aggregation}. * * @param aggregation The {@link Aggregation} that specifies how and what this will compute. * @param config The BulletConfig. */ public GroupAll(Aggregation aggregation, BulletConfig config) { // GroupOperations is all we care about - size etc. are meaningless for Group All since it's a single result operations = GroupOperation.getOperations(aggregation.getAttributes()); data = new GroupData(operations); this.provider = config.getBulletRecordProvider(); }
/** * Merges the serialized form of a GroupData into this. For all GroupOperations present, their corresponding * values will be merged according to their respective additive operation. * * @param serializedGroupData the serialized bytes of a GroupData. */ public void combine(byte[] serializedGroupData) { GroupData otherMetric = SerializerDeserializer.fromBytes(serializedGroupData); if (otherMetric == null) { log.error("Could not create a GroupData. Skipping..."); return; } combine(otherMetric); }
@Override public void update(CachingGroupData value) { if (!initialized) { // This only needs to happen once per summary (i.e. once per group). data = value.partialCopy(); initialized = true; } data.consume(value.getCachedRecord()); }
private boolean isEqual(GroupData actual, BulletRecord expected) { return actual.getMetricsAsBulletRecord(provider).equals(expected); }