@Override public SearchEventsParser.Query deserialize(byte[] message) throws IOException { try { return parser.parse(message); } catch (Exception e) { throw new IOException(e); } }
static JavaRDD<Pair<Long, String>> getHdfsSource(JavaSparkContext sc, URI inputPath) throws IOException { JavaRDD<String> input = sc.textFile(inputPath.toString()); SearchEventsParser parser = new SearchEventsParser(); return input.map(parser::parse) .filter(q -> q != null && q.query != null && !q.query.isEmpty()) .map(q -> Pair.of(q.timestamp, q.query)); }
static DataSet<Tuple2<Long, String>> getHdfsSource(ExecutionEnvironment env, URI inputPath) throws IOException { SearchEventsParser parser = new SearchEventsParser(); return env.readFile(new TextInputFormat(new Path(inputPath)), inputPath.toString()) .map(parser::parse) .filter(q -> q != null && q.query != null && !q.query.isEmpty()) .map(q -> Tuple2.of(q.timestamp, q.query)) .returns(new TypeHint<Tuple2<Long, String>>() {}); }
public Query parse(byte [] message) throws Exception { return parse(new String(message, StandardCharsets.UTF_8)); }
return FlatMap.of(input) .using(new UnaryFunctor<Pair<byte[], byte[]>, Pair<Long, String>>() { private final SearchEventsParser parser = new SearchEventsParser(); @Override public void apply(Pair<byte[], byte[]> pair, Collector<Pair<Long, String>> context) { .of(in) .using(new UnaryFunctor<String, Pair<Long, String>>() { SearchEventsParser parser = new SearchEventsParser(); @Override public void apply(String line, Collector<Pair<Long, String>> context) {
@Override public void apply(String line, Collector<Pair<Long, String>> context) { try { SearchEventsParser.Query q = parser.parse(line); if (q != null && q.query != null && !q.query.isEmpty()) { context.collect(Pair.of(q.timestamp, q.query)); } } catch (Exception e) { throw new RuntimeException(e); } } })
return ppl.apply(Read.from(HDFSFileSource.fromText(inputUri))) .apply("MapSource", ParDo.of(new DoFn<String, Tuple2<Long, String>>() { SearchEventsParser parser = new SearchEventsParser(); @ProcessElement public void processElement(ProcessContext c) {
@Override public void apply(Pair<byte[], byte[]> pair, Collector<Pair<Long, String>> context) { try { SearchEventsParser.Query q = parser.parse(pair.getSecond()); if (q != null && q.query != null && !q.query.isEmpty()) { context.collect(Pair.of(q.timestamp, q.query)); } } catch (Exception e) { throw new RuntimeException(e); } } })
@ProcessElement public void processElement(ProcessContext c) { try { SearchEventsParser.Query q = parser.parse(c.element()); if (q != null && q.query != null && !q.query.isEmpty()) { c.output(Tuple2.of(q.timestamp, q.query)); } } catch (Exception e) { throw new RuntimeException(e); } } }))