.map(lineitem -> new Tuple2<>(lineitem.f0, lineitem.f1 * (1 - lineitem.f2)))
@Test public void testReplicatedSourceToCross() throws Exception { /* * Test replicated source going into cross */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple1<Long>> source1 = env.createInput(new ReplicatingInputFormat<Long, GenericInputSplit> (new ParallelIteratorInputFormat<Long>(new NumberSequenceIterator(0L, 1000L))), BasicTypeInfo.LONG_TYPE_INFO) .map(new ToTuple()); DataSet<Tuple1<Long>> source2 = env.generateSequence(0L, 1000L).map(new ToTuple()); DataSet<Tuple1<Long>> pairs = source1.cross(source2) .filter(new FilterFunction<Tuple2<Tuple1<Long>, Tuple1<Long>>>() { @Override public boolean filter(Tuple2<Tuple1<Long>, Tuple1<Long>> value) throws Exception { return value.f0.f0.equals(value.f1.f0); } }) .map(new MapFunction<Tuple2<Tuple1<Long>, Tuple1<Long>>, Tuple1<Long>>() { @Override public Tuple1<Long> map(Tuple2<Tuple1<Long>, Tuple1<Long>> value) throws Exception { return value.f0; } }) .sum(0); List<Tuple1<Long>> result = pairs.collect(); String expectedResult = "(500500)"; compareResultAsText(result, expectedResult); }
/** * Filters edges based on the given GDL query. The resulting dataset only * contains edges that match at least one edge in the query graph. * * @param graph data graph * @param query query graph * @return dataset with matching edge triples and their candidates */ public static DataSet<TripleWithCandidates<GradoopId>> filterEdges( LogicalGraph graph, final String query) { return graph.getEdges() .filter(new MatchingEdges<>(query)) .map(new BuildTripleWithCandidates<>(query)); }
/** * Filters edges based on the given GDL query. The resulting dataset only * contains edges that match at least one edge in the query graph. * * @param graph data graph * @param query query graph * @return dataset with matching edge triples and their candidates */ public static DataSet<TripleWithCandidates<GradoopId>> filterEdges( LogicalGraph graph, final String query) { return graph.getEdges() .filter(new MatchingEdges<>(query)) .map(new BuildTripleWithCandidates<>(query)); }
/** * Filters vertices based on the given GDL query. The resulting dataset only * contains vertex ids and their candidates that match at least one vertex in * the query graph. * * @param graph data graph * @param query query graph * @return dataset with matching vertex ids and their candidates */ public static DataSet<IdWithCandidates<GradoopId>> filterVertices( LogicalGraph graph, final String query) { return graph.getVertices() .filter(new MatchingVertices<>(query)) .map(new BuildIdWithCandidates<>(query)); }
/** * Filters vertices based on the given GDL query. The resulting dataset only * contains vertex ids and their candidates that match at least one vertex in * the query graph. * * @param graph data graph * @param query query graph * @return dataset with matching vertex ids and their candidates */ public static DataSet<IdWithCandidates<GradoopId>> filterVertices( LogicalGraph graph, final String query) { return graph.getVertices() .filter(new MatchingVertices<>(query)) .map(new BuildIdWithCandidates<>(query)); }
static DataSet<Tuple2<Long, String>> getHdfsSource(ExecutionEnvironment env, URI inputPath) throws IOException { SearchEventsParser parser = new SearchEventsParser(); return env.readFile(new TextInputFormat(new Path(inputPath)), inputPath.toString()) .map(parser::parse) .filter(q -> q != null && q.query != null && !q.query.isEmpty()) .map(q -> Tuple2.of(q.timestamp, q.query)) .returns(new TypeHint<Tuple2<Long, String>>() {}); }
/** * Filters and casts EPGM graph heads from a given set of {@link Element} * * @param elements EPGM elements * @param graphHeadType graph head type * @return EPGM graph heads */ public static DataSet<GraphHead> extractGraphHeads(DataSet<Element> elements, Class<GraphHead> graphHeadType) { return elements .filter(new IsInstance<>(graphHeadType)) .map(new Cast<>(graphHeadType)) .returns(TypeExtractor.createTypeInfo(graphHeadType)); }
/** * Filters and casts EPGM graph heads from a given set of {@link Element} * * @param elements EPGM elements * @param graphHeadType graph head type * @return EPGM graph heads */ public static DataSet<GraphHead> extractGraphHeads(DataSet<Element> elements, Class<GraphHead> graphHeadType) { return elements .filter(new IsInstance<>(graphHeadType)) .map(new Cast<>(graphHeadType)) .returns(TypeExtractor.createTypeInfo(graphHeadType)); }
private DataSet<DataInstance> loadDataSet(ExecutionEnvironment env){ if (attributes==null) this.loadHeader(env); DataSet<Attributes> attsDataSet = env.fromElements(attributes); DataSource<String> data = env.readTextFile(pathFileData); Configuration config = new Configuration(); config.setString(DataFlinkLoader.RELATION_NAME, this.relationName); return data .filter(w -> !w.isEmpty()) .filter(w -> !w.startsWith("%")) .filter(line -> !line.startsWith("@attribute")) .filter(line -> !line.startsWith("@relation")) .filter(line -> !line.startsWith("@data")) .map(new DataInstanceBuilder(isNormalize())) .withParameters(config) .withBroadcastSet(attsDataSet, DataFlinkLoader.ATTRIBUTES_NAME + "_" + this.relationName); }
@Override public DataSet<Embedding> evaluate() { return input .filter(new FilterVertex(predicates)) .name(getName()) .map(new ProjectVertex(projectionPropertyKeys)) .name(getName()); }
@Override public DataSet<Embedding> evaluate() { return input .filter(new FilterVertex(predicates)) .name(getName()) .map(new ProjectVertex(projectionPropertyKeys)) .name(getName()); }
/** * Builds the initial embeddings from the given vertices. * * @param vertices vertices and their query candidates * @return initial embeddings */ DataSet<EmbeddingWithTiePoint<K>> buildInitialEmbeddings(DataSet<IdWithCandidates<K>> vertices) { Step initialStep = getTraversalCode().getStep(0); DataSet<EmbeddingWithTiePoint<K>> initialEmbeddings = vertices .filter(new ElementHasCandidate<>((int) initialStep.getFrom())) .map(new BuildEmbeddingFromVertex<>(getKeyClazz(), initialStep, getVertexCount(), getEdgeCount())); return log(initialEmbeddings, new PrintEmbeddingWithTiePoint<>(), getVertexMapping(), getEdgeMapping()); }
/** * Builds the initial embeddings from the given vertices. * * @param vertices vertices and their query candidates * @return initial embeddings */ DataSet<EmbeddingWithTiePoint<K>> buildInitialEmbeddings(DataSet<IdWithCandidates<K>> vertices) { Step initialStep = getTraversalCode().getStep(0); DataSet<EmbeddingWithTiePoint<K>> initialEmbeddings = vertices .filter(new ElementHasCandidate<>((int) initialStep.getFrom())) .map(new BuildEmbeddingFromVertex<>(getKeyClazz(), initialStep, getVertexCount(), getEdgeCount())); return log(initialEmbeddings, new PrintEmbeddingWithTiePoint<>(), getVertexMapping(), getEdgeMapping()); }
/** * Filters and casts EPGM vertices from a given set of {@link Element} * * @param elements EPGM elements * @param vertexType vertex type * @param mayOverlap vertices may be contained in multiple graphs * @return EPGM vertices */ public static DataSet<Vertex> extractVertices(DataSet<Element> elements, Class<Vertex> vertexType, boolean mayOverlap) { DataSet<Vertex> result = elements .filter(new IsInstance<>(vertexType)) .map(new Cast<>(vertexType)) .returns(TypeExtractor.createTypeInfo(vertexType)); return mayOverlap ? result .groupBy(new Id<>()) .combineGroup(new MergedGraphIds<>()) .groupBy(new Id<>()) .reduceGroup(new MergedGraphIds<>()) : result; }
/** * Filters and casts EPGM edges from a given set of {@link Element} * * @param elements EPGM elements * @param edgeType edge type * @param mayOverlap edges may be contained in multiple graphs * @return EPGM edges */ public static DataSet<Edge> extractEdges(DataSet<Element> elements, Class<Edge> edgeType, boolean mayOverlap) { DataSet<Edge> result = elements .filter(new IsInstance<>(edgeType)) .map(new Cast<>(edgeType)) .returns(TypeExtractor.createTypeInfo(edgeType)); return mayOverlap ? result .groupBy(new Id<>()) .combineGroup(new MergedGraphIds<>()).groupBy(new Id<>()) .reduceGroup(new MergedGraphIds<>()) : result; }
@Override public LogicalGraph execute(LogicalGraph graph) { DistinctVertexDegrees distinctVertexDegrees = new DistinctVertexDegrees( SamplingAlgorithm.DEGREE_PROPERTY_KEY, SamplingAlgorithm.IN_DEGREE_PROPERTY_KEY, SamplingAlgorithm.OUT_DEGREE_PROPERTY_KEY, true); DataSet<Vertex> newVertices = distinctVertexDegrees.execute(graph).getVertices() .filter(new VertexWithDegreeFilter<>(degree, SamplingAlgorithm.DEGREE_PROPERTY_KEY)) .map(new PropertyRemover<>(SamplingAlgorithm.DEGREE_PROPERTY_KEY)) .map(new PropertyRemover<>(SamplingAlgorithm.IN_DEGREE_PROPERTY_KEY)) .map(new PropertyRemover<>(SamplingAlgorithm.OUT_DEGREE_PROPERTY_KEY)); return graph.getConfig().getLogicalGraphFactory().fromDataSets( graph.getGraphHead(), newVertices, graph.getEdges()); } }
/** * {@inheritDoc} */ @Override public LogicalGraph execute(LogicalGraph graph) { DistinctVertexDegrees distinctVertexDegrees = new DistinctVertexDegrees( SamplingAlgorithm.DEGREE_PROPERTY_KEY, SamplingAlgorithm.IN_DEGREE_PROPERTY_KEY, SamplingAlgorithm.OUT_DEGREE_PROPERTY_KEY, true); DataSet<Vertex> newVertices = distinctVertexDegrees.execute(graph).getVertices() .filter(new VertexWithDegreeFilter<>(degree, SamplingAlgorithm.DEGREE_PROPERTY_KEY)) .map(new PropertyRemover<>(SamplingAlgorithm.DEGREE_PROPERTY_KEY)) .map(new PropertyRemover<>(SamplingAlgorithm.IN_DEGREE_PROPERTY_KEY)) .map(new PropertyRemover<>(SamplingAlgorithm.OUT_DEGREE_PROPERTY_KEY)); return graph.getConfig().getLogicalGraphFactory().fromDataSets( graph.getGraphHead(), newVertices, graph.getEdges()); }
@Override public LogicalGraph sample(LogicalGraph graph) { DataSet<Vertex> sampledVertices = graph.getVertices() .map(new VertexRandomMarkedMap(sampleSize, randomSeed, PROPERTY_KEY_SAMPLED)); DataSet<Edge> newEdges = graph.getEdges() .join(sampledVertices) .where(new SourceId<>()).equalTo(new Id<>()) .with(new EdgeSourceVertexJoin(PROPERTY_KEY_SAMPLED)) .join(sampledVertices) .where(1).equalTo(new Id<>()) .with(new EdgeTargetVertexJoin(PROPERTY_KEY_SAMPLED)) .filter(new EdgesWithSampledVerticesFilter(neighborType)) .map(new Value0Of3<>()); graph = graph.getFactory().fromDataSets(graph.getVertices(), newEdges); graph = new FilterVerticesWithDegreeOtherThanGiven(0L).execute(graph); return graph; } }