private Stream newDRPCStream(DRPCSpout spout) { // TODO: consider adding a shuffle grouping after the spout to avoid so much routing of the args/return-info all over the place // (at least until its possible to just pack bolt logic into the spout itself) Node n = new SpoutNode(getUniqueStreamId(), TridentUtils.getSingleOutputStreamFields(spout), null, spout, SpoutNode.SpoutType.DRPC); Stream nextStream = addNode(n); // later on, this will be joined back with return-info and all the results return nextStream.project(new Fields("args")); }
private Stream window(WindowConfig windowConfig, WindowsStoreFactory windowStoreFactory, Fields inputFields, Aggregator aggregator, Fields functionFields, boolean storeTuplesInStore) { projectionValidation(inputFields); windowConfig.validate(); Fields fields = addTriggerField(functionFields); // when storeTuplesInStore is false then the given windowStoreFactory is only used to store triggers and // that store is passed to WindowStateUpdater to remove them after committing the batch. Stream stream = _topology.addSourcedNode(this, new ProcessorNode(_topology.getUniqueStreamId(), _name, fields, fields, new WindowTridentProcessor(windowConfig, _topology.getUniqueWindowId(), windowStoreFactory, inputFields, aggregator, storeTuplesInStore))); Stream effectiveStream = stream.project(functionFields); // create StateUpdater with the given windowStoreFactory to remove triggered aggregation results form store // when they are successfully processed. StateFactory stateFactory = new WindowsStateFactory(); StateUpdater stateUpdater = new WindowsStateUpdater(windowStoreFactory); stream.partitionPersist(stateFactory, new Fields(WindowTridentProcessor.TRIGGER_FIELD_NAME), stateUpdater, new Fields()); return effectiveStream; }
private static void completeDRPC(DefaultDirectedGraph<Node, IndexedEdge> graph, Map<String, List<Node>> colocate, UniqueIdGen gen) { List<Set<Node>> connectedComponents = new ConnectivityInspector<>(graph).connectedSets(); for(Set<Node> g: connectedComponents) { checkValidJoins(g); } TridentTopology helper = new TridentTopology(graph, colocate, gen); for(Set<Node> g: connectedComponents) { SpoutNode drpcNode = getDRPCSpoutNode(g); if(drpcNode!=null) { Stream lastStream = new Stream(helper, null, getLastAddedNode(g)); Stream s = new Stream(helper, null, drpcNode); helper.multiReduce( s.project(new Fields("return-info")) .batchGlobal(), lastStream.batchGlobal(), new ReturnResultsReducer(), new Fields()); } } }
private Stream window(WindowConfig windowConfig, WindowsStoreFactory windowStoreFactory, Fields inputFields, Aggregator aggregator, Fields functionFields, boolean storeTuplesInStore) { projectionValidation(inputFields); windowConfig.validate(); Fields fields = addTriggerField(functionFields); // when storeTuplesInStore is false then the given windowStoreFactory is only used to store triggers and // that store is passed to WindowStateUpdater to remove them after committing the batch. Stream stream = _topology.addSourcedNode(this, new ProcessorNode(_topology.getUniqueStreamId(), _name, fields, fields, new WindowTridentProcessor(windowConfig, _topology.getUniqueWindowId(), windowStoreFactory, inputFields, aggregator, storeTuplesInStore))); Stream effectiveStream = stream.project(functionFields); // create StateUpdater with the given windowStoreFactory to remove triggered aggregation results form store // when they are successfully processed. StateFactory stateFactory = new WindowsStateFactory(); StateUpdater stateUpdater = new WindowsStateUpdater(windowStoreFactory); stream.partitionPersist(stateFactory, new Fields(WindowTridentProcessor.TRIGGER_FIELD_NAME), stateUpdater, new Fields()); return effectiveStream; }
private Stream newDRPCStream(DRPCSpout spout) { // TODO: consider adding a shuffle grouping after the spout to avoid so much routing of the args/return-info all over the place // (at least until its possible to just pack bolt logic into the spout itself) Node n = new SpoutNode(getUniqueStreamId(), TridentUtils.getSingleOutputStreamFields(spout), null, spout, SpoutNode.SpoutType.DRPC); Stream nextStream = addNode(n); // later on, this will be joined back with return-info and all the results return nextStream.project(new Fields("args")); }
private Stream newDRPCStream(DRPCSpout spout) { // TODO: consider adding a shuffle grouping after the spout to avoid so much routing of the args/return-info all over the place // (at least until its possible to just pack bolt logic into the spout itself) Node n = new SpoutNode(getUniqueStreamId(), TridentUtils.getSingleOutputStreamFields(spout), null, spout, SpoutNode.SpoutType.DRPC); Stream nextStream = addNode(n); // later on, this will be joined back with return-info and all the results return nextStream.project(new Fields("args")); }
.newStream("spout", spout) .each(new Fields("shortid", "date"), new DatePartitionFunction(), new Fields("cf", "cq")).project(new Fields("shortid", "cf", "cq")) .groupBy(new Fields("shortid", "cf", "cq")) .persistentAggregate(state, new Count(), new Fields("count"));
private static void completeDRPC(DefaultDirectedGraph<Node, IndexedEdge> graph, Map<String, List<Node>> colocate, UniqueIdGen gen) { List<Set<Node>> connectedComponents = new ConnectivityInspector<Node, IndexedEdge>(graph).connectedSets(); for(Set<Node> g: connectedComponents) { checkValidJoins(g); } TridentTopology helper = new TridentTopology(graph, colocate, gen); for(Set<Node> g: connectedComponents) { SpoutNode drpcNode = getDRPCSpoutNode(g); if(drpcNode!=null) { Stream lastStream = new Stream(helper, null, getLastAddedNode(g)); Stream s = new Stream(helper, null, drpcNode); helper.multiReduce( s.project(new Fields("return-info")) .batchGlobal(), lastStream.batchGlobal(), new ReturnResultsReducer(), new Fields()); } } }
private static void completeDRPC(DefaultDirectedGraph<Node, IndexedEdge> graph, Map<String, List<Node>> colocate, UniqueIdGen gen) { List<Set<Node>> connectedComponents = new ConnectivityInspector<>(graph).connectedSets(); for(Set<Node> g: connectedComponents) { checkValidJoins(g); } TridentTopology helper = new TridentTopology(graph, colocate, gen); for(Set<Node> g: connectedComponents) { SpoutNode drpcNode = getDRPCSpoutNode(g); if(drpcNode!=null) { Stream lastStream = new Stream(helper, null, getLastAddedNode(g)); Stream s = new Stream(helper, null, drpcNode); helper.multiReduce( s.project(new Fields("return-info")) .batchGlobal(), lastStream.batchGlobal(), new ReturnResultsReducer(), new Fields()); } } }
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException { TridentTopology topology = new TridentTopology(); /** * As a first thing, we need a stream of tweets which we can parse and extract * only the text and its id. As you will notice, we're going to store the stream * using the {@link ElasticSearchState} implementation using its {@link StateUpdater}. * Check their implementations for details. */ topology .newStream("tweets", spout) .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user")) .each(new Fields("text", "content"), new TweetIdExtractor(), new Fields("tweetId")) .project(new Fields("tweetId", "text")) .each(new Fields("tweetId", "text"), new Print()) .partitionPersist(new ElasticSearchStateFactory(), new Fields("tweetId", "text"), new ElasticSearchStateUpdater()); /** * Now we need a DRPC stream to query the state where the tweets are stored. * To do that, as shown below, we need an implementation of {@link QueryFunction} to * access our {@link ElasticSearchState}. */ TridentState elasticSearchState = topology.newStaticState(new ElasticSearchStateFactory()); topology .newDRPCStream("search") .each(new Fields("args"), new Split(" "), new Fields("keywords")) // let's split the arguments .stateQuery(elasticSearchState, new Fields("keywords"), new TweetQuery(), new Fields("ids")) // and pass them as query parameters .project(new Fields("ids")); return topology.build(); }
.each(new Fields("content"), new TweetIdExtractor(), new Fields("tweetId")) .each(new Fields("content"), new GetContentName(), new Fields("hashtag")) .project(new Fields("hashtag", "tweetId")); .each(new Fields("content"), new TweetIdExtractor(), new Fields("tweetId")) .each(new Fields("content"), new GetContentName(), new Fields("url")) .project(new Fields("url", "tweetId"));
.each(new Fields("hashtag", "resultrt", "resultbatch"), new LambdaMerge(), new Fields("result")) .project(new Fields("result"));
new ToHourBucket(), new Fields("bucket")) .project(new Fields("normurl", "bucket")); stream.groupBy(new Fields("normurl", "bucket")) .persistentAggregate(
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException { TridentTopology topology = new TridentTopology(); TridentState count = topology .newStream("tweets", spout) .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user")) .project(new Fields("content", "user")) .each(new Fields("content"), new OnlyHashtags()) .each(new Fields("user"), new OnlyEnglish()) .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName")) .parallelismHint(3) .groupBy(new Fields("followerClass", "contentName")) .persistentAggregate(new HazelCastStateFactory(), new Count(), new Fields("count")) .parallelismHint(3) ; topology .newDRPCStream("hashtag_count") .each(new Constants<String>("< 100", "< 10K", "< 100K", ">= 100K"), new Fields("followerClass")) .stateQuery(count, new Fields("followerClass", "args"), new MapGet(), new Fields("count")) ; return topology.build(); }
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException { TridentTopology topology = new TridentTopology(); TridentState count = topology .newStream("tweets", spout) .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user")) .project(new Fields("content", "user")) .each(new Fields("content"), new OnlyHashtags()) .each(new Fields("user"), new OnlyEnglish()) .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName")) .groupBy(new Fields("followerClass", "contentName")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) ; topology .newDRPCStream("top_hashtags") .stateQuery(count, new TupleCollectionGet(), new Fields("followerClass", "contentName")) .stateQuery(count, new Fields("followerClass", "contentName"), new MapGet(), new Fields("count")) .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(5,"count", true), new Fields("contentName", "count")) ; return topology.build(); }
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException { TridentTopology topology = new TridentTopology(); TridentState count = topology .newStream("tweets", spout) .each(new Fields("str"), new ParseTweet(), new Fields("text", "content", "user")) .project(new Fields("content", "user")) .each(new Fields("content"), new OnlyHashtags()) .each(new Fields("user"), new OnlyEnglish()) .each(new Fields("content", "user"), new ExtractFollowerClassAndContentName(), new Fields("followerClass", "contentName")) .groupBy(new Fields("followerClass", "contentName")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) ; topology .newDRPCStream("hashtag_count") .stateQuery(count, new TupleCollectionGet(), new Fields("followerClass", "contentName")) .stateQuery(count, new Fields("followerClass", "contentName"), new MapGet(), new Fields("count")) .groupBy(new Fields("followerClass")) .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(1,"count", true), new Fields("contentName", "count")) ; return topology.build(); }
public static StormTopology buildTopology(TransactionalTridentKafkaSpout spout) throws IOException { TridentTopology topology = new TridentTopology(); TridentState count = topology .newStream("tweets", spout) .each(new Fields("str"), new ParseTweet(), new Fields("status", "content", "user")) .project(new Fields("content", "user", "status")) .each(new Fields("content"), new OnlyHashtags()) .each(new Fields("status"), new OnlyGeo()) .each(new Fields("status", "content"), new ExtractLocation(), new Fields("country", "contentName")) .groupBy(new Fields("country", "contentName")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")) ; topology .newDRPCStream("location_hashtag_count") .stateQuery(count, new TupleCollectionGet(), new Fields("country", "contentName")) .stateQuery(count, new Fields("country", "contentName"), new MapGet(), new Fields("count")) .groupBy(new Fields("country")) .aggregate(new Fields("contentName", "count"), new FirstN.FirstNSortedAgg(3,"count", true), new Fields("contentName", "count")) ; return topology.build(); }
.chainEnd() .each(new Fields("sum", "count"), new DivideAsDouble(), new Fields("avg")) .project(new Fields("location", "count", "avg"))