/** * Apply a function to the attribute of each vertex in the graph. * * @param mapper the map function to apply. * @param returnType the explicit return type. * @return a new graph */ public <NV> Graph<K, NV, EV> mapVertices(final MapFunction<Vertex<K, VV>, NV> mapper, TypeInformation<Vertex<K, NV>> returnType) { DataSet<Vertex<K, NV>> mappedVertices = vertices.map( new MapFunction<Vertex<K, VV>, Vertex<K, NV>>() { private Vertex<K, NV> output = new Vertex<>(); public Vertex<K, NV> map(Vertex<K, VV> value) throws Exception { output.f0 = value.f0; output.f1 = mapper.map(value); return output; } }) .returns(returnType) .withForwardedFields("f0") .name("Map vertices"); return new Graph<>(mappedVertices, this.edges, this.context); }
/** * Apply a function to the attribute of each edge in the graph. * * @param mapper the map function to apply. * @param returnType the explicit return type. * @return a new graph */ public <NV> Graph<K, VV, NV> mapEdges(final MapFunction<Edge<K, EV>, NV> mapper, TypeInformation<Edge<K, NV>> returnType) { DataSet<Edge<K, NV>> mappedEdges = edges.map( new MapFunction<Edge<K, EV>, Edge<K, NV>>() { private Edge<K, NV> output = new Edge<>(); public Edge<K, NV> map(Edge<K, EV> value) throws Exception { output.f0 = value.f0; output.f1 = value.f1; output.f2 = mapper.map(value); return output; } }) .returns(returnType) .withForwardedFields("f0; f1") .name("Map edges"); return new Graph<>(this.vertices, mappedEdges, this.context); }
/** * Count the number of elements in a DataSet. * * @param input DataSet of elements to be counted * @param <T> element type * @return count */ public static <T> DataSet<LongValue> count(DataSet<T> input) { return input .map(new MapTo<>(new LongValue(1))) .returns(LONG_VALUE_TYPE_INFO) .name("Emit 1") .reduce(new AddLongValue()) .name("Sum"); }
@SuppressWarnings({"unchecked", "rawtypes"}) @Test public void testFunctionWithMissingGenericsAndReturns() { RichMapFunction function = new RichMapFunction() { private static final long serialVersionUID = 1L; @Override public Object map(Object value) throws Exception { return null; } }; TypeInformation<?> info = ExecutionEnvironment.getExecutionEnvironment() .fromElements("arbitrary", "data") .map(function).returns(Types.STRING).getResultType(); assertEquals(Types.STRING, info); }
return output; }).returns(returnType).withForwardedFields("f0").name("Initialize vertex values");
private static DataSet<String> analyze(DataSet<String> input, DataSet<String> stats, int branches) { for (int i = 0; i < branches; i++) { final int ii = i; if (stats != null) { input = input.map( new RichMapFunction<String, String>() { @Override public String map(String value) { return value; } }).withBroadcastSet(stats.map(s -> "(" + s + ").map"), "stats"); } DataSet<String> branch = input .map(s -> new Tuple2<>(0, s + ii)).returns(Types.TUPLE(Types.STRING, Types.INT)) .groupBy(0) .minBy(1) .map(kv -> kv.f1).returns(Types.STRING); if (stats == null) { stats = branch; } else { stats = stats.union(branch); } } return stats.map(s -> "(" + s + ").stats"); } }
.returns(Types.TUPLE(Types.INT, Types.DOUBLE)); // for lambda with generics
.returns(returnType) .setParallelism(parallelism) .name("Translate edge IDs");
.returns(returnType) .setParallelism(parallelism) .name("Translate edge values");
.returns(returnType) .setParallelism(parallelism) .name("Translate vertex IDs");
.returns(returnType) .setParallelism(parallelism) .name("Translate vertex values");
@Test public void testIdentityMapWithMissingTypesAndStringTypeHint() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> identityMapDs = ds .map(new Mapper<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>()) .returns(new TypeHint<Tuple3<Integer, Long, String>>(){}); List<Tuple3<Integer, Long, String>> result = identityMapDs.collect(); String expectedResult = "(2,2,Hello)\n" + "(3,2,Hello world)\n" + "(1,1,Hi)\n"; compareResultAsText(result, expectedResult); }
@Test public void testIdentityMapWithMissingTypesAndTypeInformationTypeHint() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> identityMapDs = ds // all following generics get erased during compilation .map(new Mapper<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>()) .returns(new TupleTypeInfo<Tuple3<Integer, Long, String>>(BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO)); List<Tuple3<Integer, Long, String>> result = identityMapDs .collect(); String expectedResult = "(2,2,Hello)\n" + "(3,2,Hello world)\n" + "(1,1,Hi)\n"; compareResultAsText(result, expectedResult); }
new InitializeWorkSet<K, VV, Message>()).returns(workSetTypeInfo);
/** * Prepares the statistic a source and edge label distribution. * @param graph the logical graph for the calculation. * @return tuples with the containing statistics. */ @Override public MapOperator<WithCount<Tuple2<String, String>>, Tuple3<String, String, Long>> execute(final LogicalGraph graph) { return new SourceLabelAndEdgeLabelDistribution() .execute(graph) .map(value -> Tuple3.of(value.f0.f0, value.f0.f1, value.f1)) .returns(new TypeHint<Tuple3<String, String, Long>>() { }); }
/** * Prepares the statistic for distinct edge properties by label. * @param graph the logical graph for the calculation. * @return tuples with the containing statistics. */ @Override public MapOperator<WithCount<Tuple2<String, String>>, Tuple3<String, String, Long>> execute(LogicalGraph graph) { return new DistinctEdgePropertiesByLabel() .execute(graph) .map(value -> Tuple3.of(value.f0.f0, value.f0.f1, value.f1)) .returns(new TypeHint<Tuple3<String, String, Long>>() { }); }
static DataSet<Tuple2<Long, String>> getHdfsSource(ExecutionEnvironment env, URI inputPath) throws IOException { SearchEventsParser parser = new SearchEventsParser(); return env.readFile(new TextInputFormat(new Path(inputPath)), inputPath.toString()) .map(parser::parse) .filter(q -> q != null && q.query != null && !q.query.isEmpty()) .map(q -> Tuple2.of(q.timestamp, q.query)) .returns(new TypeHint<Tuple2<Long, String>>() {}); }
@Override public DataSet<Tuple3<String, String, String>> readDistributed(String path, GradoopFlinkConfig config) { return config.getExecutionEnvironment() .readTextFile(path) .map(line -> StringEscaper.split(line, CSVConstants.TOKEN_DELIMITER, 3)) .map(tokens -> Tuple3.of(tokens[0], tokens[1], tokens[2])) .returns(new TypeHint<Tuple3<String, String, String>>() { }); }
@Override public DataSet<Tuple3<String, String, String>> readDistributed(String path, GradoopFlinkConfig config) { return config.getExecutionEnvironment() .readTextFile(path) .map(line -> StringEscaper.split(line, CSVConstants.TOKEN_DELIMITER, 3)) .map(tokens -> Tuple3.of(tokens[0], tokens[1], tokens[2])) .returns(new TypeHint<Tuple3<String, String, String>>() { }); }