/** * NOTE: This method is for internal use only for defining a TableSource. * Do not use it in Table API programs. */ @Override public DataStream<Row> getDataStream(StreamExecutionEnvironment env) { DeserializationSchema<Row> deserializationSchema = getDeserializationSchema(); // Version-specific Kafka consumer FlinkKafkaConsumerBase<Row> kafkaConsumer = getKafkaConsumer(topic, properties, deserializationSchema); return env.addSource(kafkaConsumer).name(explainSource()); }
public static void main(String[] args) throws Exception { final ParameterTool pt = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); setupEnvironment(env, pt); KeyedStream<Event, Integer> source = env.addSource(createEventSource(pt)) .name("EventSource") .uid("EventSource") .assignTimestampsAndWatermarks(createTimestampExtractor(pt)) .keyBy(Event::getKey); List<TypeSerializer<ComplexPayload>> stateSer = Collections.singletonList(new KryoSerializer<>(ComplexPayload.class, env.getConfig())); KeyedStream<Event, Integer> afterStatefulOperations = isOriginalJobVariant(pt) ? applyOriginalStatefulOperations(source, stateSer, Collections.emptyList()) : applyUpgradedStatefulOperations(source, stateSer, Collections.emptyList()); afterStatefulOperations .flatMap(createSemanticsCheckMapper(pt)) .name("SemanticsCheckMapper") .addSink(new PrintSinkFunction<>()); env.execute("General purpose test job"); }
public static void main(String[] args) throws Exception { final ParameterTool pt = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); setupEnvironment(env, pt); final MonotonicTTLTimeProvider ttlTimeProvider = setBackendWithCustomTTLTimeProvider(env); TtlTestConfig config = TtlTestConfig.fromArgs(pt); StateTtlConfig ttlConfig = StateTtlConfig.newBuilder(config.ttl) .cleanupIncrementally(5, true) .cleanupFullSnapshot() .build(); env .addSource(new TtlStateUpdateSource(config.keySpace, config.sleepAfterElements, config.sleepTime)) .name("TtlStateUpdateSource") .keyBy(TtlStateUpdate::getKey) .flatMap(new TtlVerifyUpdateFunction(ttlConfig, ttlTimeProvider, config.reportStatAfterUpdatesNum)) .name("TtlVerifyUpdateFunction") .addSink(new PrintSinkFunction<>()) .name("PrintFailedVerifications"); env.execute("State TTL test job"); }
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Long> dataStream1 = env.generateSequence(0, 0).name("testSource1") .map(new MapFunction<Long, Long>() { @Override DataStream<Long> dataStream2 = env.generateSequence(0, 0).name("testSource2") .map(new MapFunction<Long, Long>() { @Override
public void cancel() { }).name("test_source"); opMethod.invoke(source, resource1);
.name("source").uid("source");
@Override public DataStream<BaseRow> getBoundedStream(StreamExecutionEnvironment streamEnv) { try { List<Partition> partitionList; if (null == prunedPartitions || prunedPartitions.size() == 0){ partitionList = allPartitions; } else { partitionList = prunedPartitions; } return streamEnv.createInput( new HiveTableInputFormat.Builder(rowTypeInfo, jobConf, dbName, tableName, isPartitionTable, partitionColNames, partitionList).build()).name(explainSource()); } catch (Exception e){ logger.error("Can not normally create hiveTableInputFormat !", e); throw new RuntimeException(e); } }
/** * NOTE: This method is for internal use only for defining a TableSource. * Do not use it in Table API programs. */ @Override public DataStream<Row> getDataStream(StreamExecutionEnvironment env) { DeserializationSchema<Row> deserializationSchema = getDeserializationSchema(); // Version-specific Kafka consumer FlinkKafkaConsumerBase<Row> kafkaConsumer = getKafkaConsumer(topic, properties, deserializationSchema); return env.addSource(kafkaConsumer).name(explainSource()); }
/** * NOTE: This method is for internal use only for defining a TableSource. * Do not use it in Table API programs. */ @Override public DataStream<Row> getDataStream(StreamExecutionEnvironment env) { DeserializationSchema<Row> deserializationSchema = getDeserializationSchema(); // Version-specific Kafka consumer FlinkKafkaConsumerBase<Row> kafkaConsumer = getKafkaConsumer(topic, properties, deserializationSchema); return env.addSource(kafkaConsumer).name(explainSource()); }
.addSource(new FlinkKafkaConsumer08<String>(appArgs.getProperty(DiPConfiguration.KAFKA_TOPIC), new SimpleStringSchema(), properties)) .name("KafkaSource"); DataStream<Object[]> tweeterStream = kafkaSourceStream .map(record -> FlatJsonConverter.convertToValuesArray(record)).name("Map Data");
.getExecutionEnvironment() .addSource(sourceWrapper) .name(fullName) .uid(fullName) .returns(outputTypeInfo);
@Override public void translateNode( Read.Bounded<T> transform, FlinkStreamingTranslationContext context) { PCollection<T> output = context.getOutput(transform); TypeInformation<WindowedValue<T>> outputTypeInfo = context.getTypeInfo(context.getOutput(transform)); DataStream<WindowedValue<T>> source; try { BoundedSourceWrapper<T> sourceWrapper = new BoundedSourceWrapper<>( context.getCurrentTransform().getFullName(), context.getPipelineOptions(), transform.getSource(), context.getExecutionEnvironment().getParallelism()); source = context .getExecutionEnvironment() .addSource(sourceWrapper).name(transform.getName()).returns(outputTypeInfo); } catch (Exception e) { throw new RuntimeException( "Error while translating BoundedSource: " + transform.getSource(), e); } context.setOutputDataStream(output, source); } }
.getExecutionEnvironment() .addSource(sourceWrapper) .name(fullName) .uid(fullName) .returns(outputTypeInfo);
.getExecutionEnvironment() .addSource(sourceWrapper) .name(fullName) .uid(fullName) .returns(withIdTypeInfo);
.getExecutionEnvironment() .addSource(sourceWrapper) .name(fullName) .uid(fullName) .returns(withIdTypeInfo);
nonDedupSource = context .getExecutionEnvironment() .addSource(sourceWrapper).name(transform.getName()).returns(withIdTypeInfo);