JavaPairRDD<SelfDefineSortableKey, Text> flatOutputRDD = recordRDD.mapPartitionsToPair( new FlatOutputFucntion(cubeName, segmentId, metaUrl, sConf, samplingPercent, bytesWritten));
System.out.println("Saved country contact counts as a file"); JavaPairRDD<String, CallLog[]> contactsContactLists = validCallSigns.mapPartitionsToPair( new PairFlatMapFunction<Iterator<String>, String, CallLog[]>() { public Iterable<Tuple2<String, CallLog[]>> call(Iterator<String> input) {
private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) { List<PCollectionImpl<?>> parents = getParents(); JavaPairRDD[] rdds = new JavaPairRDD[parents.size()]; for (int i = 0; i < rdds.length; i++) { if (parents.get(i) instanceof PTableBase) { rdds[i] = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime); } else { JavaRDD rdd = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime); rdds[i] = rdd.mapPartitionsToPair(new PairFlatMapDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext())); } } return runtime.getSparkContext().union(rdds); } }
private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) { List<PCollectionImpl<?>> parents = getParents(); JavaPairRDD[] rdds = new JavaPairRDD[parents.size()]; for (int i = 0; i < rdds.length; i++) { if (parents.get(i) instanceof PTableBase) { rdds[i] = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime); } else { JavaRDD rdd = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime); rdds[i] = rdd.mapPartitionsToPair(new PairFlatMapDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext())); } } return runtime.getSparkContext().union(rdds); } }
processedTuplesRdd.mapPartitionsToPair(new TuplesToTuplesFunction<>(), preservePartitions); final JavaRDD<InputRow> coalescedInputRowsRDD = inputRowsRDD.coalesce(1); namedAnalyzerResultsRDD = coalescedInputRowsRDD.mapPartitionsToPair(new RowProcessingFunction(_sparkJobContext));
private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) { if (combineFn instanceof CombineFn && getOnlyParent() instanceof PGroupedTableImpl) { runtime.setCombineFn((CombineFn) combineFn); } JavaRDDLike<?, ?> parentRDD = ((SparkCollection) getOnlyParent()).getJavaRDDLike(runtime); fn.configure(runtime.getConfiguration()); return parentRDD .mapPartitionsWithIndex( new FlatMapIndexFn(fn, parentRDD instanceof JavaPairRDD, runtime.getRuntimeContext()), false) .mapPartitionsToPair(new CrunchPairTuple2()); } }
private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) { if (combineFn instanceof CombineFn && getOnlyParent() instanceof PGroupedTableImpl) { runtime.setCombineFn((CombineFn) combineFn); } JavaRDDLike<?, ?> parentRDD = ((SparkCollection) getOnlyParent()).getJavaRDDLike(runtime); fn.configure(runtime.getConfiguration()); return parentRDD .mapPartitionsWithIndex( new FlatMapIndexFn(fn, parentRDD instanceof JavaPairRDD, runtime.getRuntimeContext()), false) .mapPartitionsToPair(new CrunchPairTuple2()); } }
JavaPairRDD<SelfDefineSortableKey, Text> flatOutputRDD = recordRDD.mapPartitionsToPair( new FlatOutputFucntion(cubeName, segmentId, metaUrl, sConf, samplingPercent, bytesWritten));
@GET @Path("/plugin") public void plugin(HttpServiceRequest request, HttpServiceResponder responder, @QueryParam("pluginType") String pluginType, @QueryParam("pluginName") String pluginName, @QueryParam("file") String file) { try (SparkHttpServicePluginContext pluginContext = getContext().getPluginContext()) { Class<?> cls = pluginContext.usePluginClass(pluginType, pluginName, "pluginId", PluginProperties.builder().build()); if (cls == null) { responder.sendError(404, "Plugin of type " + pluginType + " and name " + pluginName + " not found."); return; } JavaSparkContext jsc = getContext().getJavaSparkContext(); Map<String, Integer> counts = jsc.textFile(file) .mapPartitionsToPair(iterator -> { ToIntFunction<String> func = pluginContext.newPluginInstance("pluginId"); List<Tuple2<String, Integer>> result = new ArrayList<>(); while (iterator.hasNext()) { String element = iterator.next(); result.add(new Tuple2<>(element, func.applyAsInt(element))); } return result; }) .reduceByKey((v1, v2) -> v1 + v2) .collectAsMap(); responder.sendJson(200, counts, new TypeToken<Map<String, Integer>>() { }.getType(), new Gson()); } }
.mapPartitionsToPair(iterator -> { ToIntFunction<String> func = pluginContext.newPluginInstance("pluggable", new UDTNameMacroEvaluator(udtName));
WindowingHelpers.unwindowFunction()), true) .mapPartitionsToPair(TranslationUtils.toPairFlatMapFunction(), true) .mapValues(