static DataSource<Tuple2<Long, String>> addTestBatchSource(ExecutionEnvironment env) { return env.fromCollection(Benchmarks.testInput(Tuple2::of)); }
@Override public void join(Tuple2<String, Integer> first, Tuple2<String, Integer> second, Collector<Tuple2<String, Double>> out) throws Exception { Double score = Benchmarks.trendsRank(longInterval, first.f1, shortInterval, second.f1, smooth); if (score > threshold) { out.collect(Tuple2.of(first.f0, score)); } } }
int shortWindowCnt = t._2()._1().intValue(); int longWindowCnt = t._2()._2().intValue(); double rank = Benchmarks.trendsRank(longInterval.toMillis(), longWindowCnt, shortInterval.toMillis(), shortWindowCnt, smooth); return new Tuple2<>(t._1(), rank); }).filter(t -> t._2() > threshold); }); formatted.saveAsTextFile(Benchmarks.createOutputPath( params.getBatch().getSinkHdfsBaseUri().toString(), BatchTrendsSpark.class.getSimpleName()));
.returns(String.class); outputs.writeAsText(Benchmarks.createOutputPath( batchParams.getSinkHdfsBaseUri().toString(), BatchTrendsFlink.class.getSimpleName()));
static JavaRDD<Pair<Long, String>> getTestInput(JavaSparkContext sparkCtx) { return sparkCtx.parallelize(Benchmarks.testInput(Pair::of)); }
@Override public void flatMap( Tuple2<Tuple3<Long, String, Integer>, Tuple3<Long, String, Integer>> value, Collector<Tuple3<Long, String, Double>> out) throws Exception { Tuple3<Long, String, Integer> first = value.f0; Tuple3<Long, String, Integer> second = value.f1; Double score = Benchmarks.trendsRank(longInterval, first.f2, shortInterval, second.f2, smooth); if (score > threshold) { out.collect(Tuple3.of(first.f0, first.f1, score)); } } }
static DataStream<Tuple2<Long, String>> addTestStreamSource(StreamExecutionEnvironment env) { return env.fromCollection(Benchmarks.testInput(Tuple2::of)); }
@ProcessElement public void processElements(ProcessContext c) { Tuple2<Long, String> key = c.element().getKey(); CoGbkResult value = c.element().getValue(); int longCount = value.getOnly(longStatsTag, 0L).intValue(); int shortCount = value.getOnly(shortStatsTag, 0L).intValue(); if (longCount > 0 && shortCount > 0) { double rank = Benchmarks.trendsRank( longInterval.toMillis(), longCount, shortInterval.toMillis(), shortCount, smooth); c.output(KV.of(key.f0, Tuple2.of(key.f1, rank))); } } })).setCoder(KvCoder.of(VarLongCoder.of(), new FlinkCoder<>(new TypeHint<Tuple2<String, Double>>() {
static Dataset<Pair<Long, String>> getTestInput(Flow flow) { ListDataSource<Pair<Long, String>> source = ListDataSource.bounded( Benchmarks.testInput(Pair::of)); return flow.createInput(source); }
List<Tuple2<Long, String>> localInput = Benchmarks.testInput(Tuple2::of); return ppl.apply(Create.of(localInput) .withCoder(