/** * Convenience for {@link #map(IMap, Predicate, Projection)} which uses a * {@link DistributedFunction} as the projection function. * <p> * <strong>NOTE:</strong> Jet only remembers the name of the map you supply * and acquires a map with that name on the local cluster. If you supply a * map instance from another cluster, no error will be thrown to indicate * this. */ @Nonnull public static <T, K, V> BatchSource<T> map( @Nonnull IMap<? extends K, ? extends V> map, @Nonnull Predicate<? super K, ? super V> predicate, @Nonnull DistributedFunction<? super Map.Entry<K, V>, ? extends T> projectionFn ) { return map(map.getName(), predicate, projectionFn); }
/** * Returns a source that fetches entries from the given Hazelcast {@code * IMap} and emits them as {@code Map.Entry}. It leverages data locality * by making each of the underlying processors fetch only those entries * that are stored on the member where it is running. * <p> * <strong>NOTE:</strong> Jet only remembers the name of the map you supply * and acquires a map with that name on the local cluster. If you supply a * map instance from another cluster, no error will be thrown to indicate * this. * <p> * The source does not save any state to snapshot. If the job is restarted, * it will re-emit all entries. * <p> * If the {@code IMap} is modified while being read, or if there is a * cluster topology change (triggering data migration), the source may * miss and/or duplicate some entries. * <p> * The default local parallelism for this processor is 2 (or 1 if just 1 * CPU is available). */ @Nonnull public static <K, V> BatchSource<Entry<K, V>> map(@Nonnull IMap<? extends K, ? extends V> map) { return map(map.getName()); }
@Nonnull Projection<? super Entry<K, V>, ? extends T> projection ) { return map(map.getName(), predicate, projection);
/** * This will take the contents of source map, converts values to the string and * suffixes the value with {@code odd} if the key is odd and with {@code event} if the key is even. */ private static Pipeline mapWithUpdating(String sourceMapName, String sinkMapName) { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.<Integer, Integer>map(sourceMapName)) .map(e -> entry(e.getKey(), String.valueOf(e.getValue()))) .drainTo( Sinks.mapWithUpdating( sinkMapName, (oldValue, item) -> item.getKey() % 2 == 0 ? oldValue + "-even" : oldValue + "-odd" ) ); return pipeline; }
public static Pipeline buildPipeline(String sourceName, String sinkName) { Pattern pattern = Pattern.compile("\\W+"); Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.<Integer, String>map(sourceName)) .flatMap(e -> Traversers.traverseArray(pattern.split(e.getValue().toLowerCase())) .filter(w -> !w.isEmpty())) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.map(sinkName)); return pipeline; }
private static Pipeline buildPipeline() { Pattern delimiter = Pattern.compile("\\W+"); Pipeline p = Pipeline.create(); p.drawFrom(Sources.<Long, String>map(BOOK_LINES)) .flatMap(e -> traverseArray(delimiter.split(e.getValue().toLowerCase()))) .filter(word -> !word.isEmpty()) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.map(COUNTS)); return p; }
/** * This will take the contents of source map and writes it into the sink map. */ private static Pipeline mapSourceAndSink(String sourceMapName, String sinkMapName) { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.map(sourceMapName)) .drainTo(Sinks.map(sinkMapName)); return pipeline; }
private static Pipeline buildPipeline(String connectionUrl) { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<Integer, User>map(MAP_NAME)) .map(Map.Entry::getValue) .drainTo(Sinks.jdbc("INSERT INTO " + TABLE_NAME + "(id, name) VALUES(?, ?)", connectionUrl, (stmt, user) -> { // Bind the values from the stream item to a PreparedStatement created from // the above query. stmt.setInt(1, user.getId()); stmt.setString(2, user.getName()); })); return p; }
public static void main(String[] args) throws Exception { System.setProperty("hazelcast.logging.type", "log4j"); NettyServer nettyServer = new NettyServer(PORT, DistributedConsumer.noop(), msg -> COUNTER.incrementAndGet()); nettyServer.start(); JetInstance jet = Jet.newJetInstance(); Jet.newJetInstance(); try { System.out.println("Populating map..."); IMapJet<Integer, Integer> map = jet.getMap(SOURCE_NAME); IntStream.range(0, SOURCE_ITEM_COUNT).parallel().forEach(i -> map.put(i, i)); Pipeline p = Pipeline.create(); p.drawFrom(Sources.map(SOURCE_NAME)) .drainTo(Sinks.socket(HOST, PORT, e -> e.getValue().toString(), UTF_8)); System.out.println("Executing job..."); jet.newJob(p).join(); } finally { nettyServer.stop(); Jet.shutdownAll(); } System.out.println("Server read " + COUNTER.get() + " items from the socket."); } }
private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<String, User>map(MAP_NAME)) .map(Map.Entry::getValue) .drainTo(AvroSinks.files(DIRECTORY_NAME, AvroSink::schemaForUser, User.class)); return p; }
/** * This will take the contents of source map, maps all keys to a key called {@code sum } * and write it into sink map using an merge function which merges the map values by adding * old value and new value. */ private static Pipeline mapWithMerging(String sourceMapName, String sinkMapName) { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.<Integer, Integer>map(sourceMapName)) .map(e -> entry("sum", e.getValue())) .drainTo( Sinks.mapWithMerging( sinkMapName, (oldValue, newValue) -> oldValue + newValue ) ); return pipeline; }
/** * This will take the contents of source map and apply entry processor to * increment the values by 5. */ private static Pipeline mapWithEntryProcessor(String sourceMapName, String sinkMapName) { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.<Integer, Integer>map(sourceMapName)) .drainTo( Sinks.mapWithEntryProcessor( sinkMapName, entryKey(), item -> new IncrementEntryProcessor(5) ) ); return pipeline; }
/** * Helper method to construct the pipeline for the job * * @return the pipeline for the job */ public static Pipeline buildPipeline() { final Pipeline p = Pipeline.create(); // Compute map server side final BatchStage<Horse> c = p.drawFrom(Sources.map(EVENTS_BY_NAME, t -> true, HORSE_FROM_EVENT)); final BatchStage<Entry<Horse, Long>> c2 = c.groupingKey(wholeItem()) .aggregate(counting()) .filter(ent -> ent.getValue() > 1); c2.drainTo(Sinks.map(MULTIPLE)); return p; }
/** * Helper method to construct the pipeline for the job * * @return the pipeline for the real-time analysis */ public static Pipeline buildPipeline() { final Pipeline pipeline = Pipeline.create(); // Draw users from the Hazelcast IMDG source BatchStage<User> users = pipeline.drawFrom(Sources.<User, Long, User>map(USER_ID, e -> true, Entry::getValue)); // All bet legs which are single BatchStage<Tuple3<Race, Horse, Bet>> bets = users.flatMap(user -> traverseStream( user.getKnownBets().stream() .filter(Bet::single) .flatMap(bet -> bet.getLegs().stream().map(leg -> tuple3(leg.getRace(), leg.getBacking(), bet))) ) ); // Find for each race the projected loss if each horse was to win BatchStage<Entry<Race, Map<Horse, Double>>> betsByRace = bets.groupingKey(Tuple3::f0).aggregate( AggregateOperations.toMap( Tuple3::f1, t -> t.f2().projectedPayout(t.f1()), // payout if backed horse was to win (l, r) -> l + r ) ); // Write out: (r : (h : losses)) betsByRace.drainTo(Sinks.map(WORST_ID)); return pipeline; }
public static void main(String[] args) throws Exception { System.setProperty("hazelcast.logging.type", "log4j"); JetInstance localJet = Jet.newJetInstance(); try { HazelcastInstance externalHz = startExternalHazelcast(); IMap<Integer, Integer> sourceMap = externalHz.getMap(MAP_1); for (int i = 0; i < ITEM_COUNT; i++) { sourceMap.put(i, i); } ClientConfig clientConfig = clientConfigForExternalHazelcast(); // pipeline that copies the remote map to a local with the same name Pipeline p1 = Pipeline.create(); p1.drawFrom(Sources.remoteMap(MAP_1, clientConfig)) .drainTo(Sinks.map(MAP_1)); // pipeline that copies the local map to a remote with different name Pipeline p2 = Pipeline.create(); p2.drawFrom(Sources.map(MAP_1)) .drainTo(Sinks.remoteMap(MAP_2, clientConfig)); localJet.newJob(p1).join(); System.out.println("Local map-1 contents: " + localJet.getMap(MAP_1).entrySet()); localJet.newJob(p2).join(); System.out.println("Remote map-2 contents: " + externalHz.getMap(MAP_2).entrySet()); } finally { Jet.shutdownAll(); Hazelcast.shutdownAll(); } }