Group.<OuterAggregate>globally() .aggregateFields(field1Selector, Sum.ofLongs(), "field1_sum") .aggregateFields(field3Selector, Sum.ofIntegers(), "field3_sum") .aggregateFields(field1Selector, Top.largestLongsFn(1), "field1_top")); Schema aggregateSchema = Schema.builder()
/** * Build up an aggregation function over the input elements. * * <p>This method specifies an aggregation over multiple fields of the input. The union of all * calls to aggregateField and aggregateFields will determine the output schema. * * <p>Field types in the output schema will be inferred from the provided combine function. * Sometimes the field type cannot be inferred due to Java's type erasure. In that case, use the * overload that allows setting the output field type explicitly. */ public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateFields( FieldAccessDescriptor fieldsToAggregate, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, String outputFieldName) { return new CombineFieldsGlobally<>( SchemaAggregateFn.<InputT>create() .aggregateFields(fieldsToAggregate, fn, outputFieldName)); }
@Test @Category(NeedsRunner.class) public void testGloballyWithSchemaAggregateFn() { Collection<AggregatePojos> elements = ImmutableList.of( new AggregatePojos(1, 1, 2), new AggregatePojos(2, 1, 3), new AggregatePojos(3, 2, 4), new AggregatePojos(4, 2, 5)); PCollection<Row> aggregate = pipeline .apply(Create.of(elements)) .apply( Group.<AggregatePojos>globally() .aggregateField("field1", Sum.ofLongs(), "field1_sum") .aggregateField("field3", Sum.ofIntegers(), "field3_sum") .aggregateField("field1", Top.largestLongsFn(1), "field1_top")); Schema aggregateSchema = Schema.builder() .addInt64Field("field1_sum") .addInt32Field("field3_sum") .addArrayField("field1_top", FieldType.INT64) .build(); Row expectedRow = Row.withSchema(aggregateSchema).addValues(10L, 14).addArray(4L).build(); PAssert.that(aggregate).containsInAnyOrder(expectedRow); pipeline.run(); }
/** * Build up an aggregation function over the input elements. * * <p>This method specifies an aggregation over multiple fields of the input. The union of all * calls to aggregateField and aggregateFields will determine the output schema. * * <p>Field types in the output schema will be inferred from the provided combine function. * Sometimes the field type cannot be inferred due to Java's type erasure. In that case, use the * overload that allows setting the output field type explicitly. */ public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateFields( List<String> inputFieldNames, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, String outputFieldName) { return aggregateFields( FieldAccessDescriptor.withFieldNames(inputFieldNames), fn, outputFieldName); }
/** * Build up an aggregation function over the input elements. * * <p>This method specifies an aggregation over single field of the input. The union of all * calls to aggregateField and aggregateFields will determine the output schema. */ public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateField( String inputFieldName, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, Field outputField) { return new CombineFieldsGlobally<>( SchemaAggregateFn.<InputT>create() .aggregateFields( FieldAccessDescriptor.withFieldNames(inputFieldName), fn, outputField)); }
/** * Build up an aggregation function over the input elements. * * <p>This method specifies an aggregation over single field of the input. The union of all * calls to aggregateField and aggregateFields will determine the output schema. */ public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateField( String inputFieldName, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, String outputFieldName) { return new CombineFieldsGlobally<>( SchemaAggregateFn.<InputT>create() .aggregateFields( FieldAccessDescriptor.withFieldNames(inputFieldName), fn, outputFieldName)); }
/** * Build up an aggregation function over the input elements. * * <p>This method specifies an aggregation over single field of the input. The union of all * calls to aggregateField and aggregateFields will determine the output schema. * * <p>Field types in the output schema will be inferred from the provided combine function. * Sometimes the field type cannot be inferred due to Java's type erasure. In that case, use the * overload that allows setting the output field type explicitly. */ public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateField( String inputFieldName, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, String outputFieldName) { return new CombineFieldsGlobally<>( schemaAggregateFn.aggregateFields( FieldAccessDescriptor.withFieldNames(inputFieldName), fn, outputFieldName)); }
/** * Build up an aggregation function over the input elements. * * <p>This method specifies an aggregation over multiple fields of the input. The union of all * calls to aggregateField and aggregateFields will determine the output schema. * * <p>Field types in the output schema will be inferred from the provided combine function. * Sometimes the field type cannot be inferred due to Java's type erasure. In that case, use the * overload that allows setting the output field type explicitly. */ public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateFields( FieldAccessDescriptor fieldAccessDescriptor, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, String outputFieldName) { return new CombineFieldsGlobally<>( schemaAggregateFn.aggregateFields(fieldAccessDescriptor, fn, outputFieldName)); }
/** * Build up an aggregation function over the input elements. * * <p>This method specifies an aggregation over single field of the input. The union of all * calls to aggregateField and aggregateFields will determine the output schema. */ public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateField( String inputFieldName, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, Field outputField) { return new CombineFieldsGlobally<>( schemaAggregateFn.aggregateFields( FieldAccessDescriptor.withFieldNames(inputFieldName), fn, outputField)); }
/** The same as {@link #aggregateField} but using field id. */ public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateField( int inputFielId, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, Field outputField) { return new CombineFieldsGlobally<>( SchemaAggregateFn.<InputT>create() .aggregateFields(FieldAccessDescriptor.withFieldIds(inputFielId), fn, outputField)); }
/** The same as {@link #aggregateField} but using field id. */ public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateField( int inputFieldId, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, String outputFieldName) { return new CombineFieldsGlobally<>( SchemaAggregateFn.<InputT>create() .aggregateFields( FieldAccessDescriptor.withFieldIds(inputFieldId), fn, outputFieldName)); }
public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateFieldsById( List<Integer> inputFieldIds, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, Field outputField) { return aggregateFields(FieldAccessDescriptor.withFieldIds(inputFieldIds), fn, outputField); }
/** * Build up an aggregation function over the input elements. * * <p>This method specifies an aggregation over multiple fields of the input. The union of all * calls to aggregateField and aggregateFields will determine the output schema. */ public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateFields( FieldAccessDescriptor fieldAccessDescriptor, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, Field outputField) { return new CombineFieldsGlobally<>( schemaAggregateFn.aggregateFields(fieldAccessDescriptor, fn, outputField)); }
/** * Build up an aggregation function over the input elements. * * <p>This method specifies an aggregation over multiple fields of the input. The union of all * calls to aggregateField and aggregateFields will determine the output schema. */ public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateFields( List<String> inputFieldNames, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, Field outputField) { return aggregateFields( FieldAccessDescriptor.withFieldNames(inputFieldNames), fn, outputField); }
/** * Build up an aggregation function over the input elements. * * <p>This method specifies an aggregation over multiple fields of the input. The union of all * calls to aggregateField and aggregateFields will determine the output schema. */ public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateFields( FieldAccessDescriptor fieldsToAggregate, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, Field outputField) { return new CombineFieldsGlobally<>( SchemaAggregateFn.<InputT>create().aggregateFields(fieldsToAggregate, fn, outputField)); }
public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateField( int inputFieldId, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, Field outputField) { return new CombineFieldsGlobally<>( schemaAggregateFn.aggregateFields( FieldAccessDescriptor.withFieldIds(inputFieldId), fn, outputField)); }
public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateField( int inputFieldId, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, String outputFieldName) { return new CombineFieldsGlobally<>( schemaAggregateFn.aggregateFields( FieldAccessDescriptor.withFieldIds(inputFieldId), fn, outputFieldName)); }
public <CombineInputT, AccumT, CombineOutputT> CombineFieldsGlobally<InputT> aggregateFieldsById( List<Integer> inputFieldIds, CombineFn<CombineInputT, AccumT, CombineOutputT> fn, String outputFieldName) { return aggregateFields( FieldAccessDescriptor.withFieldIds(inputFieldIds), fn, outputFieldName); }