private Double calculateAvg(Number sum, String field) { Number count = metrics.get(new GroupOperation(COUNT_FIELD, field, null)); if (sum == null || count == null) { return null; } return sum.doubleValue() / count.longValue(); }
@Override public Optional<List<BulletError>> initialize() { return GroupOperation.checkOperations(operations); }
/** * Creates a {@link Map} of {@link GroupOperation} to their numeric metric values from a {@link Set} of * {@link GroupOperation}. * * @param operations A set of operations. * @return A empty map of metrics that represent these operations. */ public static Map<GroupOperation, Number> makeInitialMetrics(Set<GroupOperation> operations) { Map<GroupOperation, Number> metrics = new HashMap<>(); // Initialize with nulls. for (GroupOperation operation : operations) { metrics.put(operation, null); if (operation.getType() == AVG) { // For AVG we store an addition COUNT_FIELD operation to store the count (the sum is stored in AVG) metrics.put(new GroupOperation(COUNT_FIELD, operation.getField(), null), null); } } return metrics; }
/** * Validates whether the provided {@link Collection} of {@link GroupOperation} is valid. * * @param operations The non-null operations to normalize. * @return An {@link Optional} {@link List} of {@link BulletError} if any operations were invalid or null if valid. */ public static Optional<List<BulletError>> checkOperations(Collection<GroupOperation> operations) { List<BulletError> errors = new ArrayList<>(); for (GroupOperation o : operations) { if (o.getField() == null && o.getType() != GroupOperationType.COUNT) { errors.add(makeError(GROUP_OPERATION_REQUIRES_FIELD + o.getType(), OPERATION_REQUIRES_FIELD_RESOLUTION)); } } return errors.size() > 0 ? Optional.of(errors) : Optional.empty(); }
/** * Returns the name of the result field to use for the given {@link GroupOperation}. If the operation * does specify a newName, it will be returned. Otherwise, a composite name containing the type of the * operation as well as the field name will be used (if provided). * * @param operation The operation to get the name for. * @return a String representing a name for the result of the operation. */ public static String getResultName(GroupOperation operation) { String name = operation.getNewName(); if (name != null) { return name; } GroupOperation.GroupOperationType type = operation.getType(); String field = operation.getField(); if (field == null) { return type.getName(); } return type.getName() + NAME_SEPARATOR + operation.getField(); }
/** * Constructor that requires an {@link Aggregation}. * * @param aggregation The {@link Aggregation} that specifies how and what this will compute. * @param config The BulletConfig. */ public GroupAll(Aggregation aggregation, BulletConfig config) { // GroupOperations is all we care about - size etc. are meaningless for Group All since it's a single result operations = GroupOperation.getOperations(aggregation.getAttributes()); data = new GroupData(operations); this.provider = config.getBulletRecordProvider(); }
/** * Parses a {@link Set} of group operations from an Object that is expected to be a {@link List} of {@link Map}. * * @param attributes An Map that contains an object that is the representation of List of group operations. * @return A {@link Set} of GroupOperation or {@link Collections#emptySet()}. */ @SuppressWarnings("unchecked") public static Set<GroupOperation> getOperations(Map<String, Object> attributes) { if (!hasOperations(attributes)) { return Collections.emptySet(); } List<Object> operations = Utilities.getCasted(attributes, OPERATIONS, List.class); if (operations == null) { return Collections.emptySet(); } // Return a list of distinct, non-null, GroupOperations return operations.stream().map(GroupOperation::makeOperation).filter(Objects::nonNull).collect(Collectors.toSet()); }
private void combine(Map.Entry<GroupOperation, Number> metric, GroupData otherData) { GroupOperation operation = metric.getKey(); Number value = otherData.metrics.get(metric.getKey()); switch (operation.getType()) { case MIN: updateMetric(value, metric, GroupOperation.MIN); break; case MAX: updateMetric(value, metric, GroupOperation.MAX); break; case SUM: case AVG: updateMetric(value, metric, GroupOperation.SUM); break; case COUNT: case COUNT_FIELD: updateMetric(value, metric, GroupOperation.COUNT); break; } }
private void consume(Map.Entry<GroupOperation, Number> metric, BulletRecord data) { GroupOperation operation = metric.getKey(); GroupOperation.GroupOperationType type = operation.getType(); Number casted = 1L; switch (type) { case COUNT: break; case MIN: case MAX: case SUM: case AVG: casted = extractFieldAsNumber(operation.getField(), data); break; case COUNT_FIELD: casted = extractFieldAsNumber(operation.getField(), data) ; casted = casted != null ? 1L : null; break; } updateMetric(casted, metric, GroupOperation.OPERATORS.get(type)); }
/** * Constructor that requires an {@link Aggregation} and a {@link BulletConfig} configuration. * * @param aggregation An {@link Aggregation} with valid fields and attributes for this aggregation type. * @param config The config that has relevant configs for this strategy. */ @SuppressWarnings("unchecked") public GroupBy(Aggregation aggregation, BulletConfig config) { super(aggregation, config); Map<String, Object> attributes = aggregation.getAttributes(); operations = GroupOperation.getOperations(attributes); Map<GroupOperation, Number> metrics = GroupData.makeInitialMetrics(operations); container = new CachingGroupData(null, metrics); ResizeFactor resizeFactor = getResizeFactor(config, BulletConfig.GROUP_AGGREGATION_SKETCH_RESIZE_FACTOR); float samplingProbability = config.getAs(BulletConfig.GROUP_AGGREGATION_SKETCH_SAMPLING, Float.class); // Default at 512 gives a 13.27% error rate at 99.73% confidence (3 SD). Irrelevant since we are using this to // mostly cap the number of groups. You can use the Sketch theta to extrapolate the aggregation for all the data. int nominalEntries = config.getAs(BulletConfig.GROUP_AGGREGATION_SKETCH_ENTRIES, Integer.class); int maximumSize = config.getAs(BulletConfig.GROUP_AGGREGATION_MAX_SIZE, Integer.class); int size = Math.min(aggregation.getSize(), maximumSize); sketch = new TupleSketch(resizeFactor, samplingProbability, nominalEntries, size, config.getBulletRecordProvider()); }
@SuppressWarnings("unchecked") private static GroupOperation makeOperation(Object object) { try { Map<String, String> data = (Map<String, String>) object; String type = data.get(OPERATION_TYPE); Optional<GroupOperationType> operation = SUPPORTED_GROUP_OPERATIONS.stream().filter(t -> t.isMe(type)).findFirst(); // May or may not be present String field = data.get(OPERATION_FIELD); // May or may not be present String newName = data.get(OPERATION_NEW_NAME); // Unknown GroupOperations are ignored. return operation.isPresent() ? new GroupOperation(operation.get(), field, newName) : null; } catch (ClassCastException | NullPointerException e) { return null; } } }
private void addToRecord(Map.Entry<GroupOperation, Number> metric, BulletRecord record) { GroupOperation operation = metric.getKey(); Number value = metric.getValue(); switch (operation.getType()) { case COUNT: record.setLong(getResultName(operation), value == null ? 0 : value.longValue()); break; case AVG: record.setDouble(getResultName(operation), calculateAvg(value, operation.getField())); break; case COUNT_FIELD: // Internal use only for AVG. Not exposed. break; case MIN: case MAX: case SUM: record.setDouble(getResultName(operation), value == null ? null : value.doubleValue()); break; } }
@Override public Optional<List<BulletError>> initialize() { if (Utilities.isEmpty(operations)) { return Optional.of(singletonList(GroupOperation.REQUIRES_FIELD_OR_OPERATION_ERROR)); } return GroupOperation.checkOperations(operations); }
private static byte[] getGroupDataWithCount(String countField, int count) { GroupData groupData = new GroupData(new HashSet<>(singletonList(new GroupOperation(COUNT, null, countField)))); IntStream.range(0, count).forEach(i -> groupData.consume(RecordBox.get().getRecord())); return SerializerDeserializer.toBytes(groupData); }
singletonList(new GroupOperation(COUNT, null, "cnt"))); Tuple query = TupleUtils.makeIDTuple(TupleClassifier.Type.QUERY_TUPLE, "42", filterQuery, EMPTY);
setup(bolt); List<GroupOperation> operations = asList(new GroupOperation(COUNT, null, "cnt"), new GroupOperation(SUM, "fieldB", "sumB")); String queryString = makeGroupFilterQuery("ts", singletonList("1"), EQUALS, GROUP, entries, operations, Pair.of("fieldA", "A"));
singletonList(new GroupOperation(COUNT, null, "cnt"))); Tuple query = TupleUtils.makeIDTuple(TupleClassifier.Type.QUERY_TUPLE, "42", filterQuery, EMPTY);
@Test public void testCounting() { bolt = new DonableJoinBolt(config, 5, true); setup(bolt); String filterQuery = makeGroupFilterQuery("timestamp", asList("1", "2"), EQUALS, GROUP, 1, singletonList(new GroupOperation(COUNT, null, "cnt"))); Tuple query = TupleUtils.makeIDTuple(TupleClassifier.Type.QUERY_TUPLE, "42", filterQuery, EMPTY); bolt.execute(query); // Send 5 GroupData with counts 1, 2, 3, 4, 5 to the JoinBolt IntStream.range(1, 6).forEach(i -> sendRawByteTuplesTo(bolt, "42", singletonList(getGroupDataWithCount("cnt", i)))); // 1 + 2 + 3 + 4 + 5 List<BulletRecord> result = singletonList(RecordBox.get().add("cnt", 15L).getRecord()); Tuple expected = TupleUtils.makeTuple(TupleClassifier.Type.RESULT_TUPLE, "42", Clip.of(result).asJSON(), COMPLETED); Tuple tick = TupleUtils.makeTuple(TupleClassifier.Type.TICK_TUPLE); // Should starts buffering the query for the query tickout bolt.execute(tick); for (int i = 0; i < BulletStormConfig.DEFAULT_JOIN_BOLT_QUERY_POST_FINISH_BUFFER_TICKS - 1; ++i) { bolt.execute(tick); Assert.assertFalse(wasResultEmittedTo(TopologyConstants.RESULT_STREAM, expected)); } bolt.execute(tick); Assert.assertTrue(wasResultEmittedTo(TopologyConstants.RESULT_STREAM, expected)); Tuple metadata = TupleUtils.makeTuple(TupleClassifier.Type.FEEDBACK_TUPLE, "42", new Metadata(Metadata.Signal.COMPLETE, null)); Assert.assertTrue(wasMetadataEmittedTo(TopologyConstants.FEEDBACK_STREAM, metadata)); Assert.assertEquals(collector.getAllEmittedTo(TopologyConstants.RESULT_STREAM).count(), 1); Assert.assertEquals(collector.getAllEmittedTo(TopologyConstants.FEEDBACK_STREAM).count(), 1); }
@Test public void testGroupAllCount() { // 15 Records will be consumed bolt = ComponentUtils.prepare(new DonableFilterBolt(15, new BulletStormConfig()), collector); Tuple query = makeIDTuple(TupleClassifier.Type.QUERY_TUPLE, "42", makeGroupFilterQuery("timestamp", asList("1", "2"), EQUALS, GROUP, 1, singletonList(new GroupOperation(COUNT, null, "cnt"))), METADATA); bolt.execute(query); BulletRecord record = RecordBox.get().add("timestamp", "1").getRecord(); Tuple matching = makeRecordTuple(record); IntStream.range(0, 10).forEach(i -> bolt.execute(matching)); BulletRecord another = RecordBox.get().getRecord(); Tuple nonMatching = makeRecordTuple(another); IntStream.range(0, 5).forEach(i -> bolt.execute(nonMatching)); bolt.execute(nonMatching); // Two to flush bolt Tuple tick = TupleUtils.makeTuple(TupleClassifier.Type.TICK_TUPLE); bolt.execute(tick); bolt.execute(tick); Assert.assertEquals(collector.getEmittedCount(), 1); GroupData actual = SerializerDeserializer.fromBytes(getRawPayloadOfNthTuple(1)); BulletRecord expected = RecordBox.get().add("cnt", 10L).getRecord(); Assert.assertTrue(isEqual(actual, expected)); }