storm.trident.Stream.project java code examples

private Stream newDRPCStream(DRPCSpout spout) {
  // TODO: consider adding a shuffle grouping after the spout to avoid so much routing of the args/return-info all over the place
  // (at least until its possible to just pack bolt logic into the spout itself)
  Node n = new SpoutNode(getUniqueStreamId(), TridentUtils.getSingleOutputStreamFields(spout), null, spout, SpoutNode.SpoutType.DRPC);
  Stream nextStream = addNode(n);
  // later on, this will be joined back with return-info and all the results
  return nextStream.project(new Fields("args"));
}

private Stream window(WindowConfig windowConfig, WindowsStoreFactory windowStoreFactory, Fields inputFields, Aggregator aggregator,
           Fields functionFields, boolean storeTuplesInStore) {
  projectionValidation(inputFields);
  windowConfig.validate();
  Fields fields = addTriggerField(functionFields);
  // when storeTuplesInStore is false then the given windowStoreFactory is only used to store triggers and
  // that store is passed to WindowStateUpdater to remove them after committing the batch.
  Stream stream = _topology.addSourcedNode(this,
      new ProcessorNode(_topology.getUniqueStreamId(),
          _name,
          fields,
          fields,
          new WindowTridentProcessor(windowConfig, _topology.getUniqueWindowId(), windowStoreFactory,
              inputFields, aggregator, storeTuplesInStore)));
  Stream effectiveStream = stream.project(functionFields);
  // create StateUpdater with the given windowStoreFactory to remove triggered aggregation results form store
  // when they are successfully processed.
  StateFactory stateFactory = new WindowsStateFactory();
  StateUpdater stateUpdater = new WindowsStateUpdater(windowStoreFactory);
  stream.partitionPersist(stateFactory, new Fields(WindowTridentProcessor.TRIGGER_FIELD_NAME), stateUpdater, new Fields());
  return effectiveStream;
}

private static void completeDRPC(DefaultDirectedGraph<Node, IndexedEdge> graph, Map<String, List<Node>> colocate, UniqueIdGen gen) {
  List<Set<Node>> connectedComponents = new ConnectivityInspector<>(graph).connectedSets();
  
  for(Set<Node> g: connectedComponents) {
    checkValidJoins(g);
  }
  
  TridentTopology helper = new TridentTopology(graph, colocate, gen);
  for(Set<Node> g: connectedComponents) {
    SpoutNode drpcNode = getDRPCSpoutNode(g);
    if(drpcNode!=null) {
      Stream lastStream = new Stream(helper, null, getLastAddedNode(g));
      Stream s = new Stream(helper, null, drpcNode);
      helper.multiReduce(
          s.project(new Fields("return-info"))
           .batchGlobal(),
          lastStream.batchGlobal(),
          new ReturnResultsReducer(),
          new Fields());
    }
  }                
}

private Stream window(WindowConfig windowConfig, WindowsStoreFactory windowStoreFactory, Fields inputFields, Aggregator aggregator,
           Fields functionFields, boolean storeTuplesInStore) {
  projectionValidation(inputFields);
  windowConfig.validate();
  Fields fields = addTriggerField(functionFields);
  // when storeTuplesInStore is false then the given windowStoreFactory is only used to store triggers and
  // that store is passed to WindowStateUpdater to remove them after committing the batch.
  Stream stream = _topology.addSourcedNode(this,
      new ProcessorNode(_topology.getUniqueStreamId(),
          _name,
          fields,
          fields,
          new WindowTridentProcessor(windowConfig, _topology.getUniqueWindowId(), windowStoreFactory,
              inputFields, aggregator, storeTuplesInStore)));
  Stream effectiveStream = stream.project(functionFields);
  // create StateUpdater with the given windowStoreFactory to remove triggered aggregation results form store
  // when they are successfully processed.
  StateFactory stateFactory = new WindowsStateFactory();
  StateUpdater stateUpdater = new WindowsStateUpdater(windowStoreFactory);
  stream.partitionPersist(stateFactory, new Fields(WindowTridentProcessor.TRIGGER_FIELD_NAME), stateUpdater, new Fields());
  return effectiveStream;
}

    new Debug("#### slowest vehicle"));
Stream slowDriversStream = slowVehiclesStream.project(driverField).each(driverField,
    new Debug("##### slowest driver"));
    .project(driverField).each(driverField, new Debug("##### fastest driver"));

    new Debug("#### slowest vehicle"));
Stream slowDriversStream = slowVehiclesStream.project(driverField).each(driverField,
    new Debug("##### slowest driver"));
    .project(driverField).each(driverField, new Debug("##### fastest driver"));

private Stream newDRPCStream(DRPCSpout spout) {
  // TODO: consider adding a shuffle grouping after the spout to avoid so much routing of the args/return-info all over the place
  // (at least until its possible to just pack bolt logic into the spout itself)
  Node n = new SpoutNode(getUniqueStreamId(), TridentUtils.getSingleOutputStreamFields(spout), null, spout, SpoutNode.SpoutType.DRPC);
  Stream nextStream = addNode(n);
  // later on, this will be joined back with return-info and all the results
  return nextStream.project(new Fields("args"));
}

private Stream newDRPCStream(DRPCSpout spout) {
  // TODO: consider adding a shuffle grouping after the spout to avoid so much routing of the args/return-info all over the place
  // (at least until its possible to just pack bolt logic into the spout itself)
  Node n = new SpoutNode(getUniqueStreamId(), TridentUtils.getSingleOutputStreamFields(spout), null, spout, SpoutNode.SpoutType.DRPC);
  Stream nextStream = addNode(n);
  // later on, this will be joined back with return-info and all the results
  return nextStream.project(new Fields("args"));
}

.newStream("spout", spout)
.each(new Fields("shortid", "date"), new DatePartitionFunction(),
  new Fields("cf", "cq")).project(new Fields("shortid", "cf", "cq"))
.groupBy(new Fields("shortid", "cf", "cq"))
.persistentAggregate(state, new Count(), new Fields("count"));

private static void completeDRPC(DefaultDirectedGraph<Node, IndexedEdge> graph, Map<String, List<Node>> colocate, UniqueIdGen gen) {
  List<Set<Node>> connectedComponents = new ConnectivityInspector<Node, IndexedEdge>(graph).connectedSets();
  
  for(Set<Node> g: connectedComponents) {
    checkValidJoins(g);
  }
  
  TridentTopology helper = new TridentTopology(graph, colocate, gen);
  for(Set<Node> g: connectedComponents) {
    SpoutNode drpcNode = getDRPCSpoutNode(g);
    if(drpcNode!=null) {
      Stream lastStream = new Stream(helper, null, getLastAddedNode(g));
      Stream s = new Stream(helper, null, drpcNode);
      helper.multiReduce(
          s.project(new Fields("return-info"))
           .batchGlobal(),
          lastStream.batchGlobal(),
          new ReturnResultsReducer(),
          new Fields());
    }
  }                
}

private static void completeDRPC(DefaultDirectedGraph<Node, IndexedEdge> graph, Map<String, List<Node>> colocate, UniqueIdGen gen) {
  List<Set<Node>> connectedComponents = new ConnectivityInspector<>(graph).connectedSets();
  
  for(Set<Node> g: connectedComponents) {
    checkValidJoins(g);
  }
  
  TridentTopology helper = new TridentTopology(graph, colocate, gen);
  for(Set<Node> g: connectedComponents) {
    SpoutNode drpcNode = getDRPCSpoutNode(g);
    if(drpcNode!=null) {
      Stream lastStream = new Stream(helper, null, getLastAddedNode(g));
      Stream s = new Stream(helper, null, drpcNode);
      helper.multiReduce(
          s.project(new Fields("return-info"))
           .batchGlobal(),
          lastStream.batchGlobal(),
          new ReturnResultsReducer(),
          new Fields());
    }
  }                
}

public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout)
    throws IOException {
  TridentTopology topology = new TridentTopology();
  /**
   * As a first thing, we need a stream of tweets which we can parse and extract
   * only the text and its id. As you will notice, we're going to store the stream
   * using the {@link ElasticSearchState} implementation using its {@link StateUpdater}.
   * Check their implementations for details.
   */
  topology
      .newStream("tweets", spout)
      .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
      .each(new Fields("text", "content"), new TweetIdExtractor(), new Fields("tweetId"))
      .project(new Fields("tweetId", "text"))
      .each(new Fields("tweetId", "text"), new Print())
      .partitionPersist(new ElasticSearchStateFactory(), new Fields("tweetId", "text"), new ElasticSearchStateUpdater());
  /**
   * Now we need a DRPC stream to query the state where the tweets are stored.
   * To do that, as shown below, we need an implementation of {@link QueryFunction} to
   * access our {@link ElasticSearchState}.
   */
  TridentState elasticSearchState = topology.newStaticState(new ElasticSearchStateFactory());
  topology
      .newDRPCStream("search")
      .each(new Fields("args"), new Split(" "), new Fields("keywords")) // let's split the arguments
      .stateQuery(elasticSearchState, new Fields("keywords"), new TweetQuery(), new Fields("ids")) // and pass them as query parameters
      .project(new Fields("ids"));
  return topology.build();
}

.each(new Fields("content"), new TweetIdExtractor(), new Fields("tweetId"))
.each(new Fields("content"), new GetContentName(), new Fields("hashtag"))
.project(new Fields("hashtag", "tweetId"));
.each(new Fields("content"), new TweetIdExtractor(), new Fields("tweetId"))
.each(new Fields("content"), new GetContentName(), new Fields("url"))
.project(new Fields("url", "tweetId"));

.each(new Fields("hashtag", "resultrt", "resultbatch"), new LambdaMerge(), new Fields("result"))
.project(new Fields("result"));

          new ToHourBucket(),
          new Fields("bucket"))
      .project(new Fields("normurl", "bucket"));
stream.groupBy(new Fields("normurl", "bucket"))
   .persistentAggregate(

public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {
  TridentTopology topology = new TridentTopology();
  TridentState count =
  topology
      .newStream("tweets", spout)
      .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
      .project(new Fields("content", "user"))
      .each(new Fields("content"), new OnlyHashtags())
      .each(new Fields("user"), new OnlyEnglish())
      .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
      .parallelismHint(3)
      .groupBy(new Fields("followerClass", "contentName"))
      .persistentAggregate(new HazelCastStateFactory(), new Count(), new Fields("count"))
      .parallelismHint(3)
  ;
  topology
      .newDRPCStream("hashtag_count")
      .each(new Constants<String>("< 100", "< 10K", "< 100K", ">= 100K"), new Fields("followerClass"))
      .stateQuery(count, new Fields("followerClass", "args"), new MapGet(), new Fields("count"))
  ;
  return topology.build();
}

public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {
  TridentTopology topology = new TridentTopology();
  TridentState count =
  topology
      .newStream("tweets", spout)
      .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
      .project(new Fields("content", "user"))
      .each(new Fields("content"), new OnlyHashtags())
      .each(new Fields("user"), new OnlyEnglish())
      .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
      .groupBy(new Fields("followerClass", "contentName"))
      .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
  ;
  topology
      .newDRPCStream("top_hashtags")
      .stateQuery(count, new TupleCollectionGet(), new Fields("followerClass", "contentName"))
      .stateQuery(count, new Fields("followerClass", "contentName"), new MapGet(), new Fields("count"))
      .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(5,"count", true), new Fields("contentName", "count"))
  ;
  return topology.build();
}

public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {
  TridentTopology topology = new TridentTopology();
  TridentState count =
  topology
      .newStream("tweets", spout)
      .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user"))
      .project(new Fields("content", "user"))
      .each(new Fields("content"), new OnlyHashtags())
      .each(new Fields("user"), new OnlyEnglish())
      .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName"))
      .groupBy(new Fields("followerClass", "contentName"))
      .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
  ;
  topology
      .newDRPCStream("hashtag_count")
      .stateQuery(count, new TupleCollectionGet(), new Fields("followerClass", "contentName"))
      .stateQuery(count, new Fields("followerClass", "contentName"), new MapGet(), new Fields("count"))
      .groupBy(new Fields("followerClass"))
      .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(1,"count", true), new Fields("contentName", "count"))
  ;
  return topology.build();
}

public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException {
  TridentTopology topology = new TridentTopology();
  TridentState count =
  topology
      .newStream("tweets", spout)
      .each(new Fields("str"), new ParseTweet(), new Fields("status", "content", "user"))
      .project(new Fields("content", "user", "status"))
      .each(new Fields("content"), new OnlyHashtags())
      .each(new Fields("status"), new OnlyGeo())
      .each(new Fields("status", "content"), new ExtractLocation(), new Fields("country", "contentName"))
      .groupBy(new Fields("country", "contentName"))
      .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
  ;
  topology
      .newDRPCStream("location_hashtag_count")
      .stateQuery(count, new TupleCollectionGet(), new Fields("country", "contentName"))
      .stateQuery(count, new Fields("country", "contentName"), new MapGet(), new Fields("count"))
      .groupBy(new Fields("country"))
      .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(3,"count", true), new Fields("contentName", "count"))
  ;
  return topology.build();
}

.chainEnd()
.each(new Fields("sum", "count"), new DivideAsDouble(), new Fields("avg"))
.project(new Fields("location", "count", "avg"))

Javadoc

Filters out fields from a stream, resulting in a Stream containing only the fields specified by `keepFields`. For example, if you had a Stream `mystream` containing the fields `["a", "b", "c","d"]`, calling" ```java mystream.project(new Fields("b", "d")) ``` would produce a stream containing only the fields `["b", "d"]`.

Popular methods of Stream

each
groupBy
## Grouping Operation
aggregate
partitionBy
## Repartitioning Operation
shuffle
## Repartitioning Operation Use random round robin algorithm to evenly redistribute tuples across al
stateQuery
parallelismHint
Applies a parallelism hint to a stream.
partitionAggregate
partitionPersist
global
## Repartitioning Operation All tuples are sent to the same partition. The same partition is chosen
batchGlobal
## Repartitioning Operation All tuples in the batch are sent to the same partition. Different batche
chainedAgg

Popular in Java

Creating JSON documents from java classes using gson
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
onCreateOptionsMenu (Activity)
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
ServerSocket (java.net)
This class represents a server-side socket that waits for incoming client connections. A ServerSocke
URLEncoder (java.net)
This class is used to encode a string using the format required by application/x-www-form-urlencoded
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
BorderLayout (java.awt)
A border layout lays out a container, arranging and resizing its components to fit in five regions:
BufferedImage (java.awt.image)
The BufferedImage subclass describes an java.awt.Image with an accessible buffer of image data. All
Github Copilot alternatives

How to use projectmethodin storm.trident.Stream

Best Java code snippets using storm.trident.Stream.project (Showing top 20 results out of 315)

How to use
project
method
in
storm.trident.Stream