/** Specifies how to partition elements into groups ("destinations"). */ public Write<DestinationT, UserT> by(SerializableFunction<UserT, DestinationT> destinationFn) { checkArgument(destinationFn != null, "destinationFn can not be null"); return by(fn(destinationFn)); }
/** * Specifies a custom strategy for generating filenames depending on the destination, similar to * {@link #withNaming(FileNaming)}. * * <p>This can only be used in combination with {@link #writeDynamic()} but not {@link * #write()}. */ public Write<DestinationT, UserT> withNaming( SerializableFunction<DestinationT, FileNaming> namingFn) { checkArgument(namingFn != null, "namingFn can not be null"); return withNaming(fn(namingFn)); }
/** * For a {@code SerializableFunction<InputT, ? extends Iterable<OutputT>>} {@code fn}, returns a * {@link PTransform} that applies {@code fn} to every element of the input {@code * PCollection<InputT>} and outputs all of the elements to the output {@code * PCollection<OutputT>}. * * <p>Example of use in Java 8: * * <pre>{@code * PCollection<String> words = lines.apply( * FlatMapElements.into(TypeDescriptors.strings()) * .via((String line) -> Arrays.asList(line.split(" "))) * }</pre> * * <p>In Java 7, the overload {@link #via(SimpleFunction)} is more concise as the output type * descriptor need not be provided. */ public <NewInputT> FlatMapElements<NewInputT, OutputT> via( SerializableFunction<NewInputT, ? extends Iterable<OutputT>> fn) { return new FlatMapElements<>( (Contextful) Contextful.fn(fn), fn, TypeDescriptors.inputOf(fn), outputType); }
/** * For a {@code SerializableFunction<InputT, OutputT>} {@code fn} and output type descriptor, * returns a {@code PTransform} that takes an input {@code PCollection<InputT>} and returns a * {@code PCollection<OutputT>} containing {@code fn.apply(v)} for every element {@code v} in the * input. * * <p>Example of use in Java 8: * * <pre>{@code * PCollection<Integer> wordLengths = words.apply( * MapElements.into(TypeDescriptors.integers()) * .via((String word) -> word.length())); * }</pre> * * <p>In Java 7, the overload {@link #via(SimpleFunction)} is more concise as the output type * descriptor need not be provided. */ public <NewInputT> MapElements<NewInputT, OutputT> via( SerializableFunction<NewInputT, OutputT> fn) { return new MapElements<>(Contextful.fn(fn), fn, TypeDescriptors.inputOf(fn), outputType); }
/** Like {@link #via(Contextful)}, but uses the same {@link Sink} for all destinations. */ public Write<DestinationT, UserT> via(Sink<UserT> sink) { checkArgument(sink != null, "sink can not be null"); return via(fn(SerializableFunctions.clonesOf(sink))); }
/** Like {@link #via(Contextful, Contextful)}, but uses the same sink for all destinations. */ public <OutputT> Write<DestinationT, UserT> via( Contextful<Fn<UserT, OutputT>> outputFn, final Sink<OutputT> sink) { checkArgument(sink != null, "sink can not be null"); checkArgument(outputFn != null, "outputFn can not be null"); return via(outputFn, fn(SerializableFunctions.clonesOf(sink))); }
@Override public PCollection<T> expand(PCollection<T> input) { List<PCollectionView<?>> views = Lists.newArrayList(); for (int i = 0; i < signals.size(); ++i) { views.add(signals.get(i).apply("To wait view " + i, new ToWaitView())); } return input.apply( "Wait", MapElements.into(input.getCoder().getEncodedTypeDescriptor()) .via(fn((t, c) -> t, requiresSideInputs(views)))); } }
/** * For a {@code SimpleFunction<InputT, OutputT>} {@code fn}, returns a {@code PTransform} that * takes an input {@code PCollection<InputT>} and returns a {@code PCollection<OutputT>} * containing {@code fn.apply(v)} for every element {@code v} in the input. * * <p>This overload is intended primarily for use in Java 7. In Java 8, the overload {@link * #via(SerializableFunction)} supports use of lambda for greater concision. * * <p>Example of use in Java 7: * * <pre>{@code * PCollection<String> words = ...; * PCollection<Integer> wordsPerLine = words.apply(MapElements.via( * new SimpleFunction<String, Integer>() { * public Integer apply(String word) { * return word.length(); * } * })); * }</pre> */ public static <InputT, OutputT> MapElements<InputT, OutputT> via( final SimpleFunction<InputT, OutputT> fn) { return new MapElements<>( Contextful.fn(fn), fn, fn.getInputTypeDescriptor(), fn.getOutputTypeDescriptor()); }
/** * Like {@link #via(Contextful, Contextful)}, but the output type of the sink is the same as the * type of the input collection. The sink function must create a new {@link Sink} instance every * time it is called. */ public Write<DestinationT, UserT> via(Contextful<Fn<DestinationT, Sink<UserT>>> sinkFn) { checkArgument(sinkFn != null, "sinkFn can not be null"); return toBuilder() .setSinkFn((Contextful) sinkFn) .setOutputFn(fn(SerializableFunctions.<UserT>identity())) .build(); }
Contextful<Fn<InputT, Iterable<OutputT>>> wrapped = (Contextful) Contextful.fn(fn); TypeDescriptor<OutputT> outputType = TypeDescriptors.extractFromTypeParameters(
/** Writes to different file sinks based on a */ @Override public PDone expand(PCollection<FeatureRowExtended> input) { final String folderName = options.jobName != null ? options.jobName : "unknown-jobs"; FileIO.Write<String, FeatureRowExtended> write = FileIO.<String, FeatureRowExtended>writeDynamic() .by((rowExtended) -> rowExtended.getRow().getEntityName()) .withDestinationCoder(StringUtf8Coder.of()) .withNaming( Contextful.fn( (entityName) -> FileIO.Write.defaultNaming(folderName + "/" + entityName, suffix))) .via(Contextful.fn(toTextFunction), Contextful.fn((entityName) -> TextIO.sink())) .to(options.path); if (input.isBounded().equals(IsBounded.UNBOUNDED)) { Window<FeatureRowExtended> minuteWindow = Window.<FeatureRowExtended>into(FixedWindows.of(options.getWindowDuration())) .triggering(AfterWatermark.pastEndOfWindow()) .discardingFiredPanes() .withAllowedLateness(Duration.ZERO); input = input.apply(minuteWindow); write = write.withNumShards(10); } WriteFilesResult<String> outputFiles = input.apply(write); return PDone.in(outputFiles.getPipeline()); } }
"when using writeDynamic(), must specify " + ".withNaming() taking a function form DestinationT"); return fn( (element, c) -> { FileNaming naming = getFileNamingFn().getClosure().apply(element, c); constantFileNaming = relativeFileNaming(getOutputDirectory(), constantFileNaming); return fn(SerializableFunctions.<DestinationT, FileNaming>constant(constantFileNaming));
/** Basic test of {@link MapElements} with a {@link Fn} and a side input. */ @Test @Category(NeedsRunner.class) public void testMapBasicWithSideInput() throws Exception { final PCollectionView<Integer> view = pipeline.apply("Create base", Create.of(40)).apply(View.asSingleton()); PCollection<Integer> output = pipeline .apply(Create.of(0, 1, 2)) .apply( MapElements.into(integers()) .via( fn((element, c) -> element + c.sideInput(view), requiresSideInputs(view)))); PAssert.that(output).containsInAnyOrder(40, 41, 42); pipeline.run(); }
/** Basic test of {@link FlatMapElements} with a {@link Fn} and a side input. */ @Test @Category(NeedsRunner.class) public void testFlatMapBasicWithSideInput() throws Exception { final PCollectionView<Integer> view = pipeline.apply("Create base", Create.of(40)).apply(View.asSingleton()); PCollection<Integer> output = pipeline .apply(Create.of(0, 1, 2)) .apply( FlatMapElements.into(integers()) .via( fn( (input, c) -> ImmutableList.of( c.sideInput(view) - input, c.sideInput(view) + input), requiresSideInputs(view)))); PAssert.that(output).containsInAnyOrder(38, 39, 40, 40, 41, 42); pipeline.run(); }
checkArgument( getDestinationCoder() == null, ".withDestinationCoder() requires writeDynamic()"); resolvedSpec.setDestinationFn(fn(SerializableFunctions.constant(null))); resolvedSpec.setDestinationCoder((Coder) VoidCoder.of());
Contextful.of( tableManifestNaming, Requirements.requiresSideInputs(outputDirectoryName))) .via(Contextful.fn(KV::getValue), TextIO.sink()) .withTempDirectory(outputDir));