private boolean hasUnboundedOutput(AppliedPTransform<?, ?, ?> transform) { return transform .getOutputs() .values() .stream() .filter(value -> value instanceof PCollection) .map(value -> (PCollection<?>) value) .anyMatch(collection -> collection.isBounded() == IsBounded.UNBOUNDED); }
@Override public void visitValue(PValue value, Node producer) { if (translationMode.equals(TranslationMode.BATCH)) { if (value instanceof PCollection && ((PCollection) value).isBounded() == IsBounded.UNBOUNDED) { LOG.info( "Found unbounded PCollection {}. Switching to streaming execution.", value.getName()); translationMode = TranslationMode.STREAMING; } } } }
@Override public PDone expand(PCollection<T> input) { checkArgument( IsBounded.BOUNDED == input.isBounded(), "%s can only be applied to a Bounded PCollection", Write.class.getSimpleName()); PipelineOptions options = null; sink.validate(options); return createWrite(input, sink.createWriteOperation()); }
@Override public PDone expand(PCollection<T> input) { checkArgument( IsBounded.BOUNDED == input.isBounded(), "%s can only be applied to a Bounded PCollection", Write.class.getSimpleName()); PipelineOptions options = null; sink.validate(options); return createWrite(input, sink.createWriteOperation()); }
private void verifySupportedTrigger(PCollection<Row> pCollection) { WindowingStrategy windowingStrategy = pCollection.getWindowingStrategy(); if (UNBOUNDED.equals(pCollection.isBounded()) && !triggersOncePerWindow(windowingStrategy)) { throw new UnsupportedOperationException( "Joining unbounded PCollections is currently only supported for " + "non-global windows with triggers that are known to produce output once per window," + "such as the default trigger with zero allowed lateness. " + "In these cases Beam can guarantee it joins all input elements once per window. " + windowingStrategy + " is not supported"); } }
@Override public PCollection<KV<K, OutputT>> expand(PCollection<KV<K, Iterable<InputT>>> input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), input.getWindowingStrategy(), input.isBounded(), outputCoder); }
@Override public PCollection<OutputT> expand(PCollection<? extends InputT> input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), input.getWindowingStrategy(), input.isBounded(), outputCoder, onlyOutputTag); }
@Override public PCollection<T> expand(PCollection<T> input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), updatedStrategy, input.isBounded(), input.getCoder()); }
@Override public PCollection<KV<K, OutputT>> expand(PCollection<KV<K, Iterable<AccumT>>> input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), input.getWindowingStrategy(), input.isBounded(), outputCoder); }
@Override public PCollection<List<ElemT>> expand(PCollection<List<ElemT>> input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), input.getWindowingStrategy(), input.isBounded(), input.getCoder()); }
@Override void translateNode( PTransform<PBegin, PCollection<T>> transform, FlinkStreamingTranslationContext context) { if (context.getOutput(transform).isBounded().equals(PCollection.IsBounded.BOUNDED)) { boundedTranslator.translateNode(transform, context); } else { unboundedTranslator.translateNode(transform, context); } } }
@Override public PCollection<ElemT> expand(PCollection<ElemT> input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), input.getWindowingStrategy(), input.isBounded(), input.getCoder()); }
@Override public PCollection<ElemT> expand(PCollection<ElemT> input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), input.getWindowingStrategy(), input.isBounded(), input.getCoder()); } }
@Override public PCollection<List<ElemT>> expand(PCollection<List<ElemT>> input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), input.getWindowingStrategy(), input.isBounded(), input.getCoder()); }
@SuppressWarnings({"rawtypes", "unchecked"}) @Override public PCollection<KV<K, Iterable<WindowedValue<V>>>> expand(PCollection<KV<K, V>> input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), input.getWindowingStrategy(), input.isBounded(), (Coder) GroupByKey.getOutputKvCoder(input.getCoder())); } }
@Override public PCollection<KV<K, Iterable<V>>> expand(PCollection<KeyedWorkItem<K, V>> input) { KeyedWorkItemCoder<K, V> inputCoder = getKeyedWorkItemCoder(input.getCoder()); return PCollection.createPrimitiveOutputInternal( input.getPipeline(), outputWindowingStrategy, input.isBounded(), KvCoder.of(inputCoder.getKeyCoder(), IterableCoder.of(inputCoder.getElementCoder()))); } }
@Override public PCollection<KV<K, Iterable<V>>> expand(PCollection<KeyedWorkItem<K, V>> input) { KeyedWorkItemCoder<K, V> inputCoder = getKeyedWorkItemCoder(input.getCoder()); return PCollection.createPrimitiveOutputInternal( input.getPipeline(), outputWindowingStrategy, input.isBounded(), KvCoder.of(inputCoder.getKeyCoder(), IterableCoder.of(inputCoder.getElementCoder()))); } }
@Override public PCollection<Integer> expand(PCollection<Integer> input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), WindowingStrategy.globalDefault(), input.isBounded(), input.getCoder()); } }
@Override public PDone write(PCollection<IndexedRecord> in) { ParquetHdfsFileSink sink = new ParquetHdfsFileSink(doAs, path, overwrite, mergeOutput); sink.getExtraHadoopConfiguration().addFrom(getExtraHadoopConfiguration()); PCollection<KV<Void, IndexedRecord>> pc1 = in.apply(ParDo.of(new FormatParquet())); pc1 = pc1.setCoder(KvCoder.of(VoidCoder.of(), LazyAvroCoder.of())); if (in.isBounded() == PCollection.IsBounded.BOUNDED) { return pc1.apply(Write.to(sink)); } else { return pc1.apply(UnboundedWrite.of(sink)); } }
@Override public PDone write(PCollection<IndexedRecord> in) { ParquetHdfsFileSink sink = new ParquetHdfsFileSink(doAs, path, overwrite, mergeOutput); sink.getExtraHadoopConfiguration().addFrom(getExtraHadoopConfiguration()); PCollection<KV<Void, IndexedRecord>> pc1 = in.apply(ParDo.of(new FormatParquet())); pc1 = pc1.setCoder(KvCoder.of(VoidCoder.of(), LazyAvroCoder.of())); if (in.isBounded() == PCollection.IsBounded.BOUNDED) { return pc1.apply(Write.to(sink)); } else { return pc1.apply(UnboundedWrite.of(sink)); } }