com.hazelcast.jet.pipeline.BatchStage.groupingKey java code examples

/**
 * Attaches a stage that emits just the items that are distinct according
 * to their definition of equality ({@code equals} and {@code hashCode}).
 * There is no guarantee which one of equal items it will emit.
 *
 * @return the newly attached stage
 */
@Nonnull
default BatchStage<T> distinct() {
  return groupingKey(wholeItem()).distinct();
}

public static Pipeline buildPipeline(String sourceName, String sinkName) {
  Pattern pattern = Pattern.compile("\\W+");
  Pipeline pipeline = Pipeline.create();
  pipeline.drawFrom(Sources.<Integer, String>map(sourceName))
      .flatMap(e -> Traversers.traverseArray(pattern.split(e.getValue().toLowerCase()))
                  .filter(w -> !w.isEmpty()))
      .groupingKey(wholeItem())
      .aggregate(counting())
      .drainTo(Sinks.map(sinkName));
  return pipeline;
}

private static Pipeline buildPipeline() {
  Pattern delimiter = Pattern.compile("\\W+");
  Pipeline p = Pipeline.create();
  p.drawFrom(Sources.<Long, String>map(BOOK_LINES))
   .flatMap(e -> traverseArray(delimiter.split(e.getValue().toLowerCase())))
   .filter(word -> !word.isEmpty())
   .groupingKey(wholeItem())
   .aggregate(counting())
   .drainTo(Sinks.map(COUNTS));
  return p;
}

p.drawFrom(HdfsSources.hdfs(jobConfig, (k, v) -> v.toString()))
 .flatMap(line -> traverseArray(delimiter.split(line.toLowerCase())).filter(w -> !w.isEmpty()))
 .groupingKey(wholeItem())
 .aggregate(counting())
 .drainTo(HdfsSinks.hdfs(jobConfig));

/**
 * Builds and returns the Pipeline which represents the actual computation.
 * To compute the probability of finding word B after A, one has to know
 * how many pairs contain word A as a first entry and how many of them
 * contain B as a second entry. The pipeline creates pairs from consecutive
 * words and computes the probabilities of A->B.
 */
private static Pipeline buildPipeline() {
  Pipeline p = Pipeline.create();
  // Reads files line-by-line
  BatchStage<String> lines = p.drawFrom(Sources.<String>files(INPUT_FILE));
  Pattern twoWords = Pattern.compile("(\\.|\\w+)\\s(\\.|\\w+)");
  // Calculates probabilities by flatmapping lines into two-word consecutive pairs using regular expressions
  // and aggregates them into an IMap.
  lines.flatMap(e -> traverseMatcher(twoWords.matcher(e.toLowerCase()), m -> tuple2(m.group(1), m.group(2))))
     .groupingKey(Tuple2::f0)
     .aggregate(buildAggregateOp())
     .drainTo(Sinks.map("stateTransitions"));
  return p;
}

/**
 * Helper method to construct the pipeline for the job
 *
 * @return the pipeline for the job
 */
public static Pipeline buildPipeline() {
  final Pipeline p = Pipeline.create();
  // Compute map server side
  final BatchStage<Horse> c = p.drawFrom(Sources.map(EVENTS_BY_NAME, t -> true, HORSE_FROM_EVENT));
  final BatchStage<Entry<Horse, Long>> c2 = c.groupingKey(wholeItem())
                        .aggregate(counting())
                        .filter(ent -> ent.getValue() > 1);
  c2.drainTo(Sinks.map(MULTIPLE));
  return p;
}

@SuppressWarnings("Convert2MethodRef") // https://bugs.openjdk.java.net/browse/JDK-8154236
private static Pipeline coGroupDirect() {
  Pipeline p = Pipeline.create();
  // Create three source streams
  BatchStageWithKey<PageVisit, Integer> pageVisits =
      p.drawFrom(Sources.<PageVisit>list(PAGE_VISIT))
       .groupingKey(pageVisit -> pageVisit.userId());
  BatchStageWithKey<AddToCart, Integer> addToCarts =
      p.drawFrom(Sources.<AddToCart>list(ADD_TO_CART))
       .groupingKey(addToCart -> addToCart.userId());
  BatchStageWithKey<Payment, Integer> payments =
      p.drawFrom(Sources.<Payment>list(PAYMENT))
       .groupingKey(payment -> payment.userId());
  // Construct the co-group transform. The aggregate operation collects all
  // the stream items into a 3-tuple of lists.
  BatchStage<Entry<Integer, Tuple3<List<PageVisit>, List<AddToCart>, List<Payment>>>> coGrouped =
      pageVisits.aggregate3(toList(), addToCarts, toList(), payments, toList());
  // Store the results in the output map
  coGrouped.drainTo(Sinks.map(RESULT));
  return p;
}

@SuppressWarnings("Convert2MethodRef") // https://bugs.openjdk.java.net/browse/JDK-8154236
private static Pipeline coGroupDirect() {
  Pipeline p = Pipeline.create();
  // Create three source streams
  BatchStageWithKey<PageVisit, Integer> pageVisits =
      p.drawFrom(Sources.<PageVisit>list(PAGE_VISIT))
       .groupingKey(pageVisit -> pageVisit.userId());
  BatchStageWithKey<AddToCart, Integer> addToCarts =
      p.drawFrom(Sources.<AddToCart>list(ADD_TO_CART))
       .groupingKey(addToCart -> addToCart.userId());
  BatchStageWithKey<Payment, Integer> payments =
      p.drawFrom(Sources.<Payment>list(PAYMENT))
       .groupingKey(payment -> payment.userId());
  // Construct the co-group transform. The aggregate operation collects all
  // the stream items into a 3-tuple of lists.
  BatchStage<Entry<Integer, Tuple3<List<PageVisit>, List<AddToCart>, List<Payment>>>> coGrouped =
      pageVisits.aggregate3(toList(), addToCarts, toList(), payments, toList());
  // Store the results in the output map
  coGrouped.drainTo(Sinks.map(RESULT));
  return p;
}

          return stopwords.contains(word) ? null : entry(entry.getKey(), word);
        }))
.groupingKey(entryValue()) // entry value is the word
.aggregate(AggregateOperations.toMap(entryKey(), e -> 1L, Long::sum));

/**
 * Helper method to construct the pipeline for the job
 *
 * @return the pipeline for the real-time analysis
 */
public static Pipeline buildPipeline() {
  final Pipeline pipeline = Pipeline.create();
  // Draw users from the Hazelcast IMDG source
  BatchStage<User> users = pipeline.drawFrom(Sources.<User, Long, User>map(USER_ID, e -> true, Entry::getValue));
  // All bet legs which are single
  BatchStage<Tuple3<Race, Horse, Bet>> bets = users.flatMap(user -> traverseStream(
      user.getKnownBets().stream()
        .filter(Bet::single)
        .flatMap(bet -> bet.getLegs().stream().map(leg -> tuple3(leg.getRace(), leg.getBacking(), bet)))
      )
  );
  // Find for each race the projected loss if each horse was to win
  BatchStage<Entry<Race, Map<Horse, Double>>> betsByRace = bets.groupingKey(Tuple3::f0).aggregate(
      AggregateOperations.toMap(
          Tuple3::f1,
          t -> t.f2().projectedPayout(t.f1()), // payout if backed horse was to win
          (l, r) -> l + r
      )
  );
  // Write out: (r : (h : losses))
  betsByRace.drainTo(Sinks.map(WORST_ID));
  return pipeline;
}

@SuppressWarnings("Convert2MethodRef") // https://bugs.openjdk.java.net/browse/JDK-8154236
private static Pipeline coGroupBuild() {
  Pipeline p = Pipeline.create();
  // Create three source streams
  BatchStageWithKey<PageVisit, Integer> pageVisits =
      p.drawFrom(Sources.<PageVisit>list(PAGE_VISIT))
       .groupingKey(pageVisit -> pageVisit.userId());
  BatchStageWithKey<AddToCart, Integer> addToCarts =
      p.drawFrom(Sources.<AddToCart>list(ADD_TO_CART))
       .groupingKey(addToCart -> addToCart.userId());
  BatchStageWithKey<Payment, Integer> payments =
      p.drawFrom(Sources.<Payment>list(PAYMENT))
       .groupingKey(payment -> payment.userId());
  // Obtain a builder object for the co-group transform
  GroupAggregateBuilder<Integer, List<PageVisit>> builder = pageVisits.aggregateBuilder(toList());
  Tag<List<PageVisit>> visitTag = builder.tag0();
  // Add the co-grouped streams to the builder. Here we add just two, but
  // you could add any number of them.
  Tag<List<AddToCart>> cartTag = builder.add(addToCarts, toList());
  Tag<List<Payment>> payTag = builder.add(payments, toList());
  // Build the co-group transform. The aggregate operation collects all the
  // stream items into an accumulator class called ItemsByTag. We transform
  // it into a 3-tuple of lists.
  BatchStage<Entry<Integer, Tuple3<List<PageVisit>, List<AddToCart>, List<Payment>>>> coGrouped =
      builder.build((key, res) -> entry(key, tuple3(res.get(visitTag), res.get(cartTag), res.get(payTag))));
  // Store the results in the output map
  coGrouped.drainTo(Sinks.map(RESULT));
  return p;
}

@SuppressWarnings("Convert2MethodRef") // https://bugs.openjdk.java.net/browse/JDK-8154236
private static Pipeline coGroupBuild() {
  Pipeline p = Pipeline.create();
  // Create three source streams
  BatchStageWithKey<PageVisit, Integer> pageVisits =
      p.drawFrom(Sources.<PageVisit>list(PAGE_VISIT))
       .groupingKey(pageVisit -> pageVisit.userId());
  BatchStageWithKey<AddToCart, Integer> addToCarts =
      p.drawFrom(Sources.<AddToCart>list(ADD_TO_CART))
       .groupingKey(addToCart -> addToCart.userId());
  BatchStageWithKey<Payment, Integer> payments =
      p.drawFrom(Sources.<Payment>list(PAYMENT))
       .groupingKey(payment -> payment.userId());
  // Obtain a builder object for the co-group transform
  GroupAggregateBuilder<Integer, List<PageVisit>> builder = pageVisits.aggregateBuilder(toList());
  Tag<List<PageVisit>> visitTag = builder.tag0();
  // Add the co-grouped streams to the builder. Here we add just two, but
  // you could add any number of them.
  Tag<List<AddToCart>> cartTag = builder.add(addToCarts, toList());
  Tag<List<Payment>> payTag = builder.add(payments, toList());
  // Build the co-group transform. The aggregate operation collects all the
  // stream items into an accumulator class called ItemsByTag. We transform
  // it into a 3-tuple of lists.
  BatchStage<Entry<Integer, Tuple3<List<PageVisit>, List<AddToCart>, List<Payment>>>> coGrouped =
      builder.build((key, res) -> entry(key, tuple3(res.get(visitTag), res.get(cartTag), res.get(payTag))));
  // Store the results in the output map
  coGrouped.drainTo(Sinks.map(RESULT));
  return p;
}

private static Pipeline buildPipeline(String sourceDir, String targetDir) {
  Pipeline p = Pipeline.create();
  p.drawFrom(Sources.files(sourceDir))
   .map(LogLine::parse)
   .filter((LogLine log) -> log.getResponseCode() >= 200 && log.getResponseCode() < 400)
   .flatMap(AccessLogAnalyzer::explodeSubPaths)
   .groupingKey(wholeItem())
   .aggregate(counting())
   .drainTo(Sinks.files(targetDir));
  return p;
}

Popular methods of BatchStage

drainTo
flatMap
filter
map
addTimestamps
aggregate
Attaches a stage that performs the given aggregate operation over all the items it receives. The agg
aggregate2
aggregate3
hashJoin
peek

Popular in Java

Running tasks concurrently on multiple threads
findViewById (Activity)
getSharedPreferences (Context)
onRequestPermissionsResult (Fragment)
ObjectMapper (com.fasterxml.jackson.databind)
ObjectMapper provides functionality for reading and writing JSON, either to and from basic POJOs (Pl
HttpServer (com.sun.net.httpserver)
This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
BufferedReader (java.io)
Wraps an existing Reader and buffers the input. Expensive interaction with the underlying reader is
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
Timer (java.util)
Timers schedule one-shot or recurring TimerTask for execution. Prefer java.util.concurrent.Scheduled
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
Top Vim plugins

How to use groupingKeymethodin com.hazelcast.jet.pipeline.BatchStage

Best Java code snippets using com.hazelcast.jet.pipeline.BatchStage.groupingKey (Showing top 13 results out of 315)

How to use
groupingKey
method
in
com.hazelcast.jet.pipeline.BatchStage