@Override public SearchEventsParser.Query deserialize(byte[] message) throws IOException { try { return parser.parse(message); } catch (Exception e) { throw new IOException(e); } }
static DataSource<Tuple2<Long, String>> addTestBatchSource(ExecutionEnvironment env) { return env.fromCollection(Benchmarks.testInput(Tuple2::of)); }
public Query parse(String s) throws IOException { if (s == null || (s = s.trim()).isEmpty()) { return null; } String[] split = s.split("\t"); if (split.length < 2) { return null; } long timestamp = Long.parseLong(split[0]); String query = StringUtils.stripAccents(split[1].toLowerCase(CS)); return new Query(timestamp, query); } }
Windowing longWindowing = TimeSliding.of(longInterval, shortInterval); Windowing shortWindowing = Time.of(shortInterval); List<Long> windows = longWindowing.generate(p.getFirst()); return windows.stream() .map(i -> new Tuple2<>( List<Long> windows = shortWindowing.generate(p.getFirst()); Long i = Iterables.getOnlyElement(windows); return new Tuple2<>(Pair.of(i, p.getSecond()), 1L); int shortWindowCnt = t._2()._1().intValue(); int longWindowCnt = t._2()._2().intValue(); double rank = Benchmarks.trendsRank(longInterval.toMillis(), longWindowCnt, shortInterval.toMillis(), shortWindowCnt, smooth); return new Tuple2<>(t._1(), rank); }).filter(t -> t._2() > threshold); }); formatted.saveAsTextFile(Benchmarks.createOutputPath( params.getBatch().getSinkHdfsBaseUri().toString(), BatchTrendsSpark.class.getSimpleName()));
List<Tuple2<Long, String>> localInput = Benchmarks.testInput(Tuple2::of); return ppl.apply(Create.of(localInput) .withCoder( return ppl.apply(Read.from(HDFSFileSource.fromText(inputUri))) .apply("MapSource", ParDo.of(new DoFn<String, Tuple2<Long, String>>() { SearchEventsParser parser = new SearchEventsParser(); @ProcessElement public void processElement(ProcessContext c) {
static JavaRDD<Pair<Long, String>> getHdfsSource(JavaSparkContext sc, URI inputPath) throws IOException { JavaRDD<String> input = sc.textFile(inputPath.toString()); SearchEventsParser parser = new SearchEventsParser(); return input.map(parser::parse) .filter(q -> q != null && q.query != null && !q.query.isEmpty()) .map(q -> Pair.of(q.timestamp, q.query)); }
@Override public void join(Tuple2<String, Integer> first, Tuple2<String, Integer> second, Collector<Tuple2<String, Double>> out) throws Exception { Double score = Benchmarks.trendsRank(longInterval, first.f1, shortInterval, second.f1, smooth); if (score > threshold) { out.collect(Tuple2.of(first.f0, score)); } } }
.returns(String.class); outputs.writeAsText(Benchmarks.createOutputPath( batchParams.getSinkHdfsBaseUri().toString(), BatchTrendsFlink.class.getSimpleName()));
public Query parse(byte [] message) throws Exception { return parse(new String(message, StandardCharsets.UTF_8)); }
static DataStream<Tuple2<Long, String>> addTestStreamSource(StreamExecutionEnvironment env) { return env.fromCollection(Benchmarks.testInput(Tuple2::of)); }
static DataSet<Tuple2<Long, String>> getHdfsSource(ExecutionEnvironment env, URI inputPath) throws IOException { SearchEventsParser parser = new SearchEventsParser(); return env.readFile(new TextInputFormat(new Path(inputPath)), inputPath.toString()) .map(parser::parse) .filter(q -> q != null && q.query != null && !q.query.isEmpty()) .map(q -> Tuple2.of(q.timestamp, q.query)) .returns(new TypeHint<Tuple2<Long, String>>() {}); }
@Override public void flatMap( Tuple2<Tuple3<Long, String, Integer>, Tuple3<Long, String, Integer>> value, Collector<Tuple3<Long, String, Double>> out) throws Exception { Tuple3<Long, String, Integer> first = value.f0; Tuple3<Long, String, Integer> second = value.f1; Double score = Benchmarks.trendsRank(longInterval, first.f2, shortInterval, second.f2, smooth); if (score > threshold) { out.collect(Tuple3.of(first.f0, first.f1, score)); } } }
@Override public void apply(String line, Collector<Pair<Long, String>> context) { try { SearchEventsParser.Query q = parser.parse(line); if (q != null && q.query != null && !q.query.isEmpty()) { context.collect(Pair.of(q.timestamp, q.query)); } } catch (Exception e) { throw new RuntimeException(e); } } })
static JavaRDD<Pair<Long, String>> getTestInput(JavaSparkContext sparkCtx) { return sparkCtx.parallelize(Benchmarks.testInput(Pair::of)); }
return FlatMap.of(input) .using(new UnaryFunctor<Pair<byte[], byte[]>, Pair<Long, String>>() { private final SearchEventsParser parser = new SearchEventsParser(); @Override public void apply(Pair<byte[], byte[]> pair, Collector<Pair<Long, String>> context) { .of(in) .using(new UnaryFunctor<String, Pair<Long, String>>() { SearchEventsParser parser = new SearchEventsParser(); @Override public void apply(String line, Collector<Pair<Long, String>> context) {
@ProcessElement public void processElements(ProcessContext c) { Tuple2<Long, String> key = c.element().getKey(); CoGbkResult value = c.element().getValue(); int longCount = value.getOnly(longStatsTag, 0L).intValue(); int shortCount = value.getOnly(shortStatsTag, 0L).intValue(); if (longCount > 0 && shortCount > 0) { double rank = Benchmarks.trendsRank( longInterval.toMillis(), longCount, shortInterval.toMillis(), shortCount, smooth); c.output(KV.of(key.f0, Tuple2.of(key.f1, rank))); } } })).setCoder(KvCoder.of(VarLongCoder.of(), new FlinkCoder<>(new TypeHint<Tuple2<String, Double>>() {
@Override public void apply(Pair<byte[], byte[]> pair, Collector<Pair<Long, String>> context) { try { SearchEventsParser.Query q = parser.parse(pair.getSecond()); if (q != null && q.query != null && !q.query.isEmpty()) { context.collect(Pair.of(q.timestamp, q.query)); } } catch (Exception e) { throw new RuntimeException(e); } } })
static Dataset<Pair<Long, String>> getTestInput(Flow flow) { ListDataSource<Pair<Long, String>> source = ListDataSource.bounded( Benchmarks.testInput(Pair::of)); return flow.createInput(source); }
@ProcessElement public void processElement(ProcessContext c) { try { SearchEventsParser.Query q = parser.parse(c.element()); if (q != null && q.query != null && !q.query.isEmpty()) { c.output(Tuple2.of(q.timestamp, q.query)); } } catch (Exception e) { throw new RuntimeException(e); } } }))