/** * Get the training data - a JavaRDD<DataSet> * Note that this approach for getting training data is a special case for this example (modelling characters), and * should not be taken as best practice for loading data (like CSV etc) in general. */ public static JavaRDD<DataSet> getTrainingData(JavaSparkContext sc) throws IOException { //Get data. For the sake of this example, we are doing the following operations: // File -> String -> List<String> (split into length "sequenceLength" characters) -> JavaRDD<String> -> JavaRDD<DataSet> List<String> list = getShakespeareAsList(exampleLength); JavaRDD<String> rawStrings = sc.parallelize(list); Broadcast<Map<Character, Integer>> bcCharToInt = sc.broadcast(CHAR_TO_INT); return rawStrings.map(new StringToDataSetFn(bcCharToInt)); }
userIDIndexMap.size(), itemIDIndexMap.size()); Broadcast<Map<String,Integer>> bUserIDToIndex = sparkContext.broadcast(userIDIndexMap); Broadcast<Map<String,Integer>> bItemIDToIndex = sparkContext.broadcast(itemIDIndexMap); Broadcast<Map<Integer,String>> bUserIndexToID = sparkContext.broadcast(invertMap(userIDIndexMap)); Broadcast<Map<Integer,String>> bItemIndexToID = sparkContext.broadcast(invertMap(itemIDIndexMap));
final Broadcast<Map<String, Integer>> broadcastVar = jsc.broadcast(userIdLookupMap_wrapped); JavaRDD<String> json_only_with_zeros = filtered_by_client.map(new Function<Tuple2<String, ActionData>, String>() {
final Broadcast<String[]> signPrefixes = sc.broadcast(loadCallSignTable()); JavaPairRDD<String, Integer> countryContactCounts = contactCounts.mapToPair( new PairFunction<Tuple2<String, Integer>, String, Integer> (){
Broadcast<List<Integer>> allItemIDsBC = sparkContext.broadcast(positiveUserProducts.values().distinct().collect());
protected void broadcastMemory(final JavaSparkContext sparkContext) { this.broadcast.destroy(true); // do we need to block? final Map<String, Object> toBroadcast = new HashMap<>(); this.sparkMemory.forEach((key, object) -> { if (!object.value().isEmpty() && this.memoryComputeKeys.get(key).isBroadcast()) toBroadcast.put(key, object.value()); }); this.broadcast = sparkContext.broadcast(toBroadcast); }
final Broadcast<Integer> topN = ctx.broadcast(N);
public SparkMemory(final VertexProgram<?> vertexProgram, final Set<MapReduce> mapReducers, final JavaSparkContext sparkContext) { if (null != vertexProgram) { for (final MemoryComputeKey key : vertexProgram.getMemoryComputeKeys()) { this.memoryComputeKeys.put(key.getKey(), key); } } for (final MapReduce mapReduce : mapReducers) { this.memoryComputeKeys.put(mapReduce.getMemoryKey(), MemoryComputeKey.of(mapReduce.getMemoryKey(), Operator.assign, false, false)); } for (final MemoryComputeKey memoryComputeKey : this.memoryComputeKeys.values()) { this.sparkMemory.put( memoryComputeKey.getKey(), sparkContext.accumulator(ObjectWritable.empty(), memoryComputeKey.getKey(), new MemoryAccumulator<>(memoryComputeKey))); } this.broadcast = sparkContext.broadcast(Collections.emptyMap()); }
final Broadcast<Integer> topN = ctx.broadcast(N);
final Broadcast<Map<String, Double>> broadcastTimeTable = ctx.broadcast(timetable);
final Broadcast<Map<String, Double>> broadcastTimeTable = ctx.broadcast(timetable);
String output = args[2]; final Broadcast<Integer> brodcastWindow = sc.broadcast(neighborWindow);
String output = args[2]; final Broadcast<Integer> brodcastWindow = sc.broadcast(neighborWindow);