@Override public void runUpdate(JavaSparkContext sparkContext, long timestamp, JavaPairRDD<String,String> newData, JavaPairRDD<String,String> pastData, String modelDirString, TopicProducer<String,String> modelUpdateTopic) throws IOException { JavaPairRDD<String,String> allData = pastData == null ? newData : newData.union(pastData); String modelString; try { modelString = new ObjectMapper().writeValueAsString(countDistinctOtherWords(allData)); } catch (JsonProcessingException jpe) { throw new IOException(jpe); } modelUpdateTopic.send("MODEL", modelString); }
sc.setCallSite(CallSite.apply("UnionRDD (" + rdd.name() + ", " + prevRDD.name() + ")", "")); rdd = rdd.union(prevRDD); rdd.setName("UnionRDD (" + rdd.getNumPartitions() + ")"); sc.setCallSite(CallSite.apply("UnionRDD (" + rdd.name() + ", " + finalRDD.name() + ")", "")); finalRDD = finalRDD.union(rdd); finalRDD.setName("UnionRDD (" + finalRDD.getNumPartitions() + ")");
JavaPairRDD<String,Tuple2<String,String>> allRDD = transactionsRDD.union(usersRDD);
JavaPairRDD<String,Tuple2<String,String>> allRDD = transactionsRDD.union(usersRDD);
/** * Join two RDDs together into one larger one. * @param datasetOne the first {@link JavaPairRDD} to join * @param datasetTwo the second {@link JavaPairRDD} to join * @return the {@link JavaPairRDD} of the joined datasets */ public static JavaPairRDD<Text, BytesWritable> joinDatasets(JavaPairRDD<Text, BytesWritable> datasetOne, JavaPairRDD<Text, BytesWritable> datasetTwo){ return datasetOne.union(datasetTwo); }
@Override public MPairStream<T, U> union(@NonNull MPairStream<? extends T, ? extends U> other) { return new SparkPairStream<>(rdd.union(toPairRDD(other))); }
public <K, V> JavaPairRDD<K, V> createRDD(JavaSparkExecutionContext sec, JavaSparkContext jsc, String sourceName, Class<K> keyClass, Class<V> valueClass) { Set<String> inputNames = sourceInputs.get(sourceName); if (inputNames == null || inputNames.isEmpty()) { // should never happen if validation happened correctly at pipeline configure time throw new IllegalArgumentException( sourceName + " has no input. Please check that the source calls setInput at some input."); } JavaPairRDD<K, V> inputRDD = JavaPairRDD.fromJavaRDD(jsc.<Tuple2<K, V>>emptyRDD()); for (String inputName : inputNames) { inputRDD = inputRDD.union(createInputRDD(sec, jsc, inputName, keyClass, valueClass)); } return inputRDD; }
this.distributedBackVectorImage = this.distributedBackVectorImage.union(distributedFontImageNoHeaderFooter); this.distributedBackVectorImage = this.distributedBackVectorImage.sortByKey(); logger.info("[GeoSparkViz][JoinImage][Stop]");
svgHeaderFooter.add(new Tuple2<Integer, String>(2, g2.getSVGFooter())); JavaPairRDD<Integer, String> distributedSVGHeaderFooter = sparkContext.parallelizePairs(svgHeaderFooter); this.distributedVectorImage = this.distributedVectorImage.union(distributedSVGHeaderFooter); this.distributedVectorImage = this.distributedVectorImage.sortByKey(); if (this.parallelRenderImage == true) {
JavaPairRDD<String, Tuple3<String,Integer,PortableDataStream>> merged = first2.union(second2);
JavaPairRDD<String, Tuple3<String, Integer, PortableDataStream>> second2 = second.mapToPair(new PathToKeyFunction(1, converter2)); JavaPairRDD<String, Tuple3<String, Integer, PortableDataStream>> merged = first2.union(second2);
JavaPairRDD<String, Tuple3<String, Integer, PortableDataStream>> second2 = second.mapToPair(new PathToKeyFunction(1, converter2)); JavaPairRDD<String, Tuple3<String, Integer, PortableDataStream>> merged = first2.union(second2);
}).union(additionalA).union(zeroIndexedA).reduceByKey(FieldT::add); }).union(zeroIndexedB).reduceByKey(FieldT::add); }).union(zeroIndexedC).reduceByKey(FieldT::add); config.endLog("Compute evaluation of polynomials A, B, and C, on set S."); coefficientsH = DistributedFFT .radix2CosetInverseFFT(coefficientsH, multiplicativeGenerator, rows, cols, fieldFactory) .union(config.sparkContext() .parallelizePairs(Collections.singletonList(new Tuple2<>(domainSize, zero)))); config.endLog("Compute coefficients of polynomial H.");
.union(rightPair) .setName(operator.getName() + "::union-inputs") .repartitionAndSortWithinPartitions(partitioner, comparator)