/** * Extracts a {@link GraphCollection} from a set of {@link Element}. * * @param elements EPGM elements * @param config Gradoop Flink config * @param mayOverlap elements may be contained in multiple graphs * @return Graph collection */ @SuppressWarnings("unchecked") public static GraphCollection extractGraphCollection( DataSet<Element> elements, GradoopFlinkConfig config, boolean mayOverlap) { Class<GraphHead> graphHeadType = config.getGraphHeadFactory().getType(); Class<Vertex> vertexType = config.getVertexFactory().getType(); Class<Edge> edgeType = config.getEdgeFactory().getType(); return config.getGraphCollectionFactory().fromDataSets( extractGraphHeads(elements, graphHeadType), extractVertices(elements, vertexType, mayOverlap), extractEdges(elements, edgeType, mayOverlap) ); }
@Override public LogicalGraph getLogicalGraph() { DataSet<Vertex> vertices = config.getExecutionEnvironment().readTextFile(jsonPath) .map(new MinimalJsonToVertex(config.getVertexFactory())); return config.getLogicalGraphFactory().fromDataSets(vertices); }
/** * Constructor. * * @param config Gradoop configuration */ public SubgraphDecoder(GradoopFlinkConfig config) { vertexFactory = config.getVertexFactory(); graphHeadFactory = config.getGraphHeadFactory(); edgeFactory = config.getEdgeFactory(); }
@Override public LogicalGraph getLogicalGraph() { DataSet<Tuple3<String, String, String>> metaData = MetaData.fromFile(getMetaDataPath(), getConfig()); DataSet<Vertex> vertices = getConfig().getExecutionEnvironment() .readTextFile(getVertexCSVPath()) .map(new CSVLineToVertex(getConfig().getVertexFactory())) .withBroadcastSet(metaData, BC_METADATA); DataSet<Edge> edges = getConfig().getExecutionEnvironment() .readTextFile(getEdgeCSVPath()) .map(new CSVLineToEdge(getConfig().getEdgeFactory())) .withBroadcastSet(metaData, BC_METADATA); return getConfig().getLogicalGraphFactory().fromDataSets(vertices, edges); }
@Override public GraphCollection getGraphCollection() throws IOException { return config.getGraphCollectionFactory().fromGraph(getLogicalGraph()); } }
/** * Extracts vertices and edges from the query result and constructs a * maximum match graph. * * @param graph input graph * @param vertices valid vertices after simulation * @return maximum match graph */ private GraphCollection postProcess(LogicalGraph graph, DataSet<FatVertex> vertices) { GradoopFlinkConfig config = graph.getConfig(); DataSet<Vertex> matchVertices = doAttachData() ? PostProcessor.extractVerticesWithData(vertices, graph.getVertices()) : PostProcessor.extractVertices(vertices, config.getVertexFactory()); DataSet<Edge> matchEdges = doAttachData() ? PostProcessor.extractEdgesWithData(vertices, graph.getEdges()) : PostProcessor.extractEdges(vertices, config.getEdgeFactory()); return config.getGraphCollectionFactory().fromGraph( config.getLogicalGraphFactory().fromDataSets(matchVertices, matchEdges)); }
/** * Returns a collection of all logical graph contained in the database. * * @return collection of all logical graphs */ public GraphCollection getGraphCollection() { ExecutionEnvironment env = config.getExecutionEnvironment(); DataSet<Vertex> newVertices = env.fromCollection(getVertices()) .filter(vertex -> vertex.getGraphCount() > 0); DataSet<Edge> newEdges = env.fromCollection(getEdges()) .filter(edge -> edge.getGraphCount() > 0); return config.getGraphCollectionFactory() .fromDataSets(env.fromCollection(getGraphHeads()), newVertices, newEdges); }
/** * Default transformation from a Gradoop Graph to a Gelly Graph. * * @param graph Gradoop Graph. * @return Gelly Graph. */ public Graph<GradoopId, VV, EV> transformToGelly(LogicalGraph graph) { DataSet<Vertex<GradoopId, VV>> gellyVertices = graph.getVertices().map(toGellyVertex); DataSet<Edge<GradoopId, EV>> gellyEdges = graph.getEdges().map(toGellyEdge); return Graph.fromDataSet(gellyVertices, gellyEdges, graph.getConfig().getExecutionEnvironment()); }
@BeforeClass public static void setup() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); GradoopFlinkConfig config = GradoopFlinkConfig.createConfig(env); GraphHead g0 = config.getGraphHeadFactory().createGraphHead("A"); GraphHead g1 = config.getGraphHeadFactory().createGraphHead("B"); Vertex v0 = config.getVertexFactory().createVertex("A"); Vertex v1 = config.getVertexFactory().createVertex("B"); Vertex v2 = config.getVertexFactory().createVertex("C"); Edge e0 = config.getEdgeFactory().createEdge("a", v0.getId(), v1.getId()); Edge e1 = config.getEdgeFactory().createEdge("b", v1.getId(), v2.getId()); v0.addGraphId(g0.getId()); v1.addGraphId(g0.getId()); v1.addGraphId(g1.getId()); v2.addGraphId(g1.getId()); e0.addGraphId(g0.getId()); e1.addGraphId(g1.getId()); tx0 = new GraphTransaction(g0, Sets.newHashSet(v0, v1), Sets.newHashSet(e0)); tx1 = new GraphTransaction(g1, Sets.newHashSet(v1, v2), Sets.newHashSet(e1)); }
/** * Creates a graph head dataset from a given collection. * Encapsulates the workaround for dataset creation from an empty collection. * * @param graphHeads graph heads * @return graph head dataset */ protected DataSet<GraphHead> createGraphHeadDataSet(Collection<GraphHead> graphHeads) { ExecutionEnvironment env = getConfig().getExecutionEnvironment(); DataSet<GraphHead> graphHeadSet; if (graphHeads.isEmpty()) { graphHeadSet = env .fromElements(getConfig().getGraphHeadFactory().createGraphHead()) .filter(new False<>()); } else { graphHeadSet = env.fromCollection(graphHeads); } return graphHeadSet; }
/** * Reads the csv file specified by {@link MinimalCSVImporter#path} and converts each valid line * to a {@link Vertex}. * * @param propertyNames list of the property identifier names * @param checkReoccurringHeader set to true if each row of the file should be checked for * reoccurring of the column property names * @return a {@link DataSet} of all vertices from one specific file */ private DataSet<Vertex> readCSVFile(List<String> propertyNames, boolean checkReoccurringHeader) { return config.getExecutionEnvironment() .readTextFile(path) .flatMap(new CsvRowToProperties(tokenSeparator, propertyNames, checkReoccurringHeader)) .map(new PropertiesToVertex<>(config.getVertexFactory())) .returns(config.getVertexFactory().getType()); }
@Override public BaseGraphFactory<GraphHead, Vertex, Edge, LogicalGraph> getFactory() { return config.getLogicalGraphFactory(); }
/** * {@inheritDoc} * * The factory is passed from {@link GradoopFlinkConfig} at the moment. */ @Override public EPGMVertexFactory<Vertex> getVertexFactory() { return config.getVertexFactory(); }
@Test public void write() throws Exception { AccumuloEPGMStore accumuloStore = new AccumuloEPGMStore(AccumuloTestSuite.getAcConfig(TEST_02)); FlinkAsciiGraphLoader loader = new FlinkAsciiGraphLoader( GradoopFlinkConfig.createConfig(getExecutionEnvironment())); InputStream inputStream = getClass().getResourceAsStream( GradoopTestUtils.SOCIAL_NETWORK_GDL_FILE); loader.initDatabaseFromStream(inputStream); GradoopFlinkConfig flinkConfig = GradoopFlinkConfig.createConfig(getExecutionEnvironment()); new AccumuloDataSink(accumuloStore, flinkConfig) .write(flinkConfig.getGraphCollectionFactory() .fromCollections( loader.getGraphHeads(), loader.getVertices(), loader.getEdges())); getExecutionEnvironment().execute(); accumuloStore.flush(); validateEPGMElementCollections(loader.getGraphHeads(), accumuloStore.getGraphSpace().readRemainsAndClose()); validateEPGMElementCollections(loader.getVertices(), accumuloStore.getVertexSpace().readRemainsAndClose()); validateEPGMGraphElementCollections(loader.getVertices(), accumuloStore.getVertexSpace().readRemainsAndClose()); validateEPGMElementCollections(loader.getEdges(), accumuloStore.getEdgeSpace().readRemainsAndClose()); validateEPGMGraphElementCollections(loader.getEdges(), accumuloStore.getEdgeSpace().readRemainsAndClose()); accumuloStore.close(); }
/** * Returns example integrated instance graph from GDL input. * @return integrated instance graph * @throws IOException */ public static LogicalGraph getIntegratedInstanceGraph() throws IOException { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); GradoopFlinkConfig gradoopConf = GradoopFlinkConfig.createConfig(env); FlinkAsciiGraphLoader loader = new FlinkAsciiGraphLoader(gradoopConf); String gdl = IOUtils.toString(CategoryCharacteristicPatterns.class .getResourceAsStream("/data/gdl/itbda.gdl")); gdl = gdl .replaceAll("SOURCEID_KEY", SOURCEID_KEY) .replaceAll("SUPERTYPE_KEY", SUPERTYPE_KEY) .replaceAll("SUPERCLASS_VALUE_MASTER", SUPERCLASS_VALUE_MASTER) .replaceAll("SUPERCLASS_VALUE_TRANSACTIONAL", SUPERCLASS_VALUE_TRANSACTIONAL); loader.initDatabaseFromString(gdl); return loader.getLogicalGraphByVariable("iig"); }
/** * Returns a logical graph containing the complete vertex and edge space of * the database. * * @param withGraphContainment true, if vertices and edges shall be updated to * be contained in the logical graph representing * the database * @return logical graph of vertex and edge space */ public LogicalGraph getLogicalGraph(boolean withGraphContainment) { if (withGraphContainment) { return config.getLogicalGraphFactory().fromCollections(getVertices(), getEdges()) .transformGraphHead(new RenameLabel<>(GradoopConstants.DEFAULT_GRAPH_LABEL, GradoopConstants.DB_GRAPH_LABEL)); } else { GraphHead graphHead = config.getGraphHeadFactory() .createGraphHead(GradoopConstants.DB_GRAPH_LABEL); return config.getLogicalGraphFactory().fromCollections(graphHead, getVertices(), getEdges()); } }
/** * Tries to add an unsupported property type {@link Queue} as property value. */ @Test(expected = UnsupportedTypeException.class) public void wrongPropertyTypeTest() throws AccumuloSecurityException, AccumuloException { GradoopAccumuloConfig config = AccumuloTestSuite.getAcConfig(TEST04); AccumuloEPGMStore graphStore = new AccumuloEPGMStore(config); // Queue is not supported by final Queue<String> value = Queues.newPriorityQueue(); GradoopId vertexID = GradoopId.get(); final String label = "A"; Properties props = Properties.create(); props.set("k1", value); final GradoopIdSet graphs = new GradoopIdSet(); GradoopFlinkConfig flinkConfig = GradoopFlinkConfig.createConfig(getExecutionEnvironment()); graphStore.writeVertex(flinkConfig .getVertexFactory() .initVertex(vertexID, label, props, graphs)); }
/** * {@inheritDoc} * * The factory is passed from {@link GradoopFlinkConfig} at the moment. */ @Override public EPGMEdgeFactory<Edge> getEdgeFactory() { return config.getEdgeFactory(); }
/** * {@inheritDoc} * * The factory is passed from {@link GradoopFlinkConfig} at the moment. */ @Override public EPGMGraphHeadFactory<GraphHead> getGraphHeadFactory() { return config.getGraphHeadFactory(); }
@Override protected LogicalGraphLayoutFactory<GraphHead, Vertex, Edge> getFactory() { IndexedGVEGraphLayoutFactory logicalGraphLayoutFactory = new IndexedGVEGraphLayoutFactory(); GradoopFlinkConfig config = GradoopFlinkConfig.createConfig(getExecutionEnvironment()); config.setLogicalGraphLayoutFactory(logicalGraphLayoutFactory); return logicalGraphLayoutFactory; } }