public PlannerVertex addVertex(Transform transform, String name, int localParallelism, ProcessorMetaSupplier metaSupplier) { PlannerVertex pv = new PlannerVertex(dag.newVertex(name, metaSupplier)); pv.v.localParallelism(localParallelism); xform2vertex.put(transform, pv); return pv; }
@Nonnull private static DAG buildDag(Map<String, Long> counts) { final Pattern delimiter = Pattern.compile("\\W+"); DAG dag = new DAG(); Vertex source = dag.newVertex("source", DocLinesP::new); Vertex tokenize = dag.newVertex("tokenize", flatMapP((String line) -> traverseArray(delimiter.split(line.toLowerCase())) .filter(word -> !word.isEmpty())) ); Vertex aggregate = dag.newVertex("aggregate", aggregateByKeyP(singletonList(wholeItem()), counting(), Util::entry)); Vertex sink = dag.newVertex("sink", () -> new MapSinkP(counts)); return dag.edge(between(source.localParallelism(1), tokenize)) .edge(between(tokenize, aggregate).partitioned(wholeItem(), HASH_CODE)) .edge(between(aggregate, sink)); }
public static void main(String[] args) throws Exception { System.setProperty("hazelcast.logging.type", "log4j"); Jet.newJetInstance(); JetInstance jet = Jet.newJetInstance(); try { IMapJet<Object, Object> map = jet.getMap("map"); range(0, COUNT).parallel().forEach(i -> map.put("key-" + i, i)); DAG dag = new DAG(); Vertex source = dag.newVertex("map-source", SourceProcessors.readMapP(map.getName())); Vertex sink = dag.newVertex("file-sink", new WriteFilePSupplier(OUTPUT_FOLDER)); dag.edge(between(source, sink)); jet.newJob(dag).join(); System.out.println("\nHazelcast IMap dumped to folder " + new File(OUTPUT_FOLDER).getAbsolutePath()); } finally { Jet.shutdownAll(); } } }
private void rewriteDagWithSnapshotRestore(DAG dag, long snapshotId, String mapName) { IMap<Object, Object> snapshotMap = nodeEngine.getHazelcastInstance().getMap(mapName); snapshotId = SnapshotValidator.validateSnapshot(snapshotId, jobIdString(), snapshotMap); logger.info("State of " + jobIdString() + " will be restored from snapshot " + snapshotId + ", map=" + mapName); List<Vertex> originalVertices = new ArrayList<>(); dag.iterator().forEachRemaining(originalVertices::add); Map<String, Integer> vertexToOrdinal = new HashMap<>(); Vertex readSnapshotVertex = dag.newVertex(SNAPSHOT_VERTEX_PREFIX + "read", readMapP(mapName)); long finalSnapshotId = snapshotId; Vertex explodeVertex = dag.newVertex(SNAPSHOT_VERTEX_PREFIX + "explode", () -> new ExplodeSnapshotP(vertexToOrdinal, finalSnapshotId)); dag.edge(between(readSnapshotVertex, explodeVertex).isolated()); int index = 0; // add the edges for (Vertex userVertex : originalVertices) { vertexToOrdinal.put(userVertex.getName(), index); int destOrdinal = dag.getInboundEdges(userVertex.getName()).size(); dag.edge(new SnapshotRestoreEdge(explodeVertex, index, userVertex, destOrdinal)); index++; } }
public static CompletableFuture<Void> copyMapUsingJob(JetInstance instance, int queueSize, String sourceMap, String targetMap) { DAG dag = new DAG(); Vertex source = dag.newVertex("readMap(" + sourceMap + ')', readMapP(sourceMap)); Vertex sink = dag.newVertex("writeMap(" + targetMap + ')', writeMapP(targetMap)); dag.edge(between(source, sink).setConfig(new EdgeConfig().setQueueSize(queueSize))); JobConfig jobConfig = new JobConfig() .setName("copy-" + sourceMap + "-to-" + targetMap); return instance.newJob(dag, jobConfig).getFuture(); } }
Vertex source = dag.newVertex("source", readMapP(DOCID_NAME)); Vertex docLines = dag.newVertex("doc-lines", ); Vertex tokenize = dag.newVertex("tokenize", flatMapP((String line) -> traverseArray(delimiter.split(line.toLowerCase())) .filter(word -> !word.isEmpty())) ); Vertex accumulate = dag.newVertex("accumulate", accumulateByKeyP(singletonList(wholeItem()), counting())); Vertex combine = dag.newVertex("combine", combineByKeyP(counting(), Util::entry)); Vertex sink = dag.newVertex("sink", writeMapP("counts"));
SLIDING_WINDOW_LENGTH_MILLIS, SLIDE_STEP_MILLIS); Vertex tradeSource = dag.newVertex("trade-source", SourceProcessors.<Trade, Long, Trade>streamMapP(TRADES_MAP_NAME, DistributedPredicate.alwaysTrue(), EventJournalMapEvent::getNewValue, 30000L ))); Vertex slidingStage1 = dag.newVertex("sliding-stage-1", Processors.accumulateByFrameP( singletonList(keyFn), singletonList(timestampFn), TimestampKind.EVENT, winPolicy, counting() )); Vertex slidingStage2 = dag.newVertex("sliding-stage-2", Processors.combineToSlidingWindowP(winPolicy, counting(), TimestampedEntry::fromWindowResult)); Vertex formatOutput = dag.newVertex("format-output", mapUsingContextP( ContextFactory.withCreateFn(x -> DateTimeFormatter.ofPattern("HH:mm:ss.SSS")), (DateTimeFormatter timeFormat, TimestampedEntry<String, Long> tse) -> tse.getKey(), tse.getValue()) )); Vertex sink = dag.newVertex("sink", SinkProcessors.writeFileP(OUTPUT_DIR_NAME, Object::toString, StandardCharsets.UTF_8, false));
SlidingWindowPolicy winPolicy = slidingWinPolicy(SLIDING_WINDOW_LENGTH_MILLIS, SLIDE_STEP_MILLIS); Vertex streamTrades = dag.newVertex("stream-trades", SourceProcessors.<Trade, Long, Trade>streamMapP(TRADES_MAP_NAME, DistributedPredicate.alwaysTrue(), EventJournalMapEvent::getNewValue, JournalInitialPosition.START_FROM_OLDEST, 30000L ))); Vertex slidingWindow = dag.newVertex("aggregate-to-sliding-win", aggregateToSlidingWindowP( singletonList((DistributedFunction<Trade, String>) Trade::getTicker), counting(), TimestampedEntry::fromWindowResult)); Vertex formatOutput = dag.newVertex("format-output", formatOutput()); Vertex sink = dag.newVertex("sink", writeFileP(OUTPUT_DIR_NAME, Object::toString, StandardCharsets.UTF_8, false));
private void addSlidingWindowTwoStage(Planner p, SlidingWindowDef wDef) { String vertexName = p.uniqueVertexName(name()); SlidingWindowPolicy winPolicy = wDef.toSlidingWindowPolicy(); Vertex v1 = p.dag.newVertex(vertexName + FIRST_STAGE_VERTEX_NAME_SUFFIX, accumulateByFrameP( keyFns, nCopies(keyFns.size(), (DistributedToLongFunction<JetEvent>) JetEvent::timestamp), TimestampKind.EVENT, winPolicy, aggrOp)); v1.localParallelism(localParallelism()); PlannerVertex pv2 = p.addVertex(this, vertexName, localParallelism(), combineToSlidingWindowP(winPolicy, aggrOp, mapToOutputFn)); p.addEdges(this, v1, (e, ord) -> e.partitioned(keyFns.get(ord), HASH_CODE)); p.dag.edge(between(v1, pv2.v).distributed().partitioned(entryKey())); }
Vertex stopwordSource = dag.newVertex("stopword-source", StopwordsP::new); Vertex docSource = dag.newVertex("doc-source", readMapP(DOCID_NAME)); Vertex docCount = dag.newVertex("doc-count", Processors.aggregateP(counting())); Vertex docLines = dag.newVertex("doc-lines", .map(line -> entry(e.getKey(), line))))); Vertex tokenize = dag.newVertex("tokenize", TokenizeP::new); Vertex tf = dag.newVertex("tf", aggregateByKeyP(singletonList(wholeItem()), counting(), Util::entry)); Vertex tfidf = dag.newVertex("tf-idf", TfIdfP::new); Vertex sink = dag.newVertex("sink", SinkProcessors.writeMapP(INVERTED_INDEX));
public static void main(String[] args) { System.setProperty("hazelcast.logging.type", "log4j"); JetInstance instance = Jet.newJetInstance(); Jet.newJetInstance(); try { TickerInfo.populateMap(instance.getHazelcastInstance().getReplicatedMap("tickersInfo")); DAG dag = new DAG(); Vertex tradesSource = dag.newVertex("tradesSource", GenerateTradesP::new); Vertex enrichment = dag.newVertex("enrichment", mapUsingContextP(replicatedMapContext("tickersInfo"), (ReplicatedMap<String, TickerInfo> map, Trade item) -> tuple2(item, map.get(item.getTicker())))); Vertex sink = dag.newVertex("sink", DiagnosticProcessors.writeLoggerP()); tradesSource.localParallelism(1); dag .edge(between(tradesSource, enrichment)) .edge(between(enrichment, sink)); instance.newJob(dag).join(); } finally { Jet.shutdownAll(); } } }
Vertex tradesSource = dag.newVertex("tradesSource", GenerateTradesP::new); Vertex readTickerInfoMap = dag.newVertex("readTickerInfoMap", readMapP(TICKER_INFO_MAP_NAME)); Vertex collectToMap = dag.newVertex("collectToMap", Processors.aggregateP(AggregateOperations.toMap(entryKey(), entryValue()))); Vertex hashJoin = dag.newVertex("hashJoin", () -> new HashJoinP<>(Trade::getTicker)); Vertex sink = dag.newVertex("sink", writeLoggerP(o -> Arrays.toString((Object[]) o)));
DistributedFunction<Object, Object> projectFn = (DistributedFunction<Object, Object>) clause.rightProjectFn(); Vertex collector = p.dag.newVertex(collectorName + collectorOrdinal, () -> new HashJoinCollectP(getKeyFn, projectFn)); collector.localParallelism(1);
private void addToDagTwoStage(Planner p) { List<DistributedFunction<?, ? extends K>> groupKeyFns = this.groupKeyFns; String vertexName = p.uniqueVertexName(this.name()); Vertex v1 = p.dag.newVertex(vertexName + FIRST_STAGE_VERTEX_NAME_SUFFIX, accumulateByKeyP(groupKeyFns, aggrOp)) .localParallelism(localParallelism()); PlannerVertex pv2 = p.addVertex(this, vertexName, localParallelism(), combineByKeyP(aggrOp, mapToOutputFn)); p.addEdges(this, v1, (e, ord) -> e.partitioned(groupKeyFns.get(ord), HASH_CODE)); p.dag.edge(between(v1, pv2.v).distributed().partitioned(entryKey())); } }
private void addSlidingWindowTwoStage(Planner p, SlidingWindowDef wDef) { String vertexName = p.uniqueVertexName(name()); SlidingWindowPolicy winPolicy = wDef.toSlidingWindowPolicy(); Vertex v1 = p.dag.newVertex(vertexName + FIRST_STAGE_VERTEX_NAME_SUFFIX, accumulateByFrameP( nCopies(aggrOp.arity(), constantKey()), nCopies(aggrOp.arity(), (DistributedToLongFunction<JetEvent>) JetEvent::timestamp), TimestampKind.EVENT, winPolicy, aggrOp )); v1.localParallelism(localParallelism()); PlannerVertex pv2 = p.addVertex(this, vertexName, 1, combineToSlidingWindowP(winPolicy, aggrOp, mapToOutputFn.toKeyedWindowResultFn())); p.addEdges(this, v1); p.dag.edge(between(v1, pv2.v).distributed().allToOne()); }
@Override @SuppressWarnings("unchecked") public void addToDag(Planner p) { if (emitsWatermarks || eventTimePolicy == null) { // Reached when the source either emits both JetEvents and watermarks // or neither. In these cases we don't have to insert watermarks. p.addVertex(this, p.uniqueVertexName(name()), localParallelism(), metaSupplierFn.apply(eventTimePolicy != null ? eventTimePolicy : noEventTime()) ); } else { // ------------ // | sourceP | // ------------ // | // isolated // v // ------------- // | insertWmP | // ------------- String v1name = p.uniqueVertexName(name()); Vertex v1 = p.dag.newVertex(v1name, metaSupplierFn.apply(eventTimePolicy)) .localParallelism(localParallelism()); PlannerVertex pv2 = p.addVertex( this, v1name + "-add-timestamps", localParallelism(), insertWatermarksP(eventTimePolicy) ); p.dag.edge(between(v1, pv2.v).isolated()); } }
private void addToDagTwoStage(Planner p) { String vertexName = p.uniqueVertexName(name()); Vertex v1 = p.dag.newVertex(vertexName + FIRST_STAGE_VERTEX_NAME_SUFFIX, accumulateP(aggrOp)) .localParallelism(localParallelism()); PlannerVertex pv2 = p.addVertex(this, vertexName, 1, combineP(aggrOp)); p.addEdges(this, v1); p.dag.edge(between(v1, pv2.v).distributed().allToOne()); } }
@Override public void addToDag(Planner p) { String vertexName = p.uniqueVertexName(this.name()); Vertex v1 = p.dag.newVertex(vertexName + FIRST_STAGE_VERTEX_NAME_SUFFIX, distinctP(keyFn)) .localParallelism(localParallelism()); PlannerVertex pv2 = p.addVertex(this, vertexName, localParallelism(), distinctP(keyFn)); p.addEdges(this, v1, (e, ord) -> e.partitioned(keyFn, HASH_CODE)); p.dag.edge(between(v1, pv2.v).distributed().partitioned(keyFn)); }