public FulgoraMemory(final VertexProgram<?> vertexProgram, final Set<MapReduce> mapReducers) { this.currentMap = new ConcurrentHashMap<>(); this.previousMap = new ConcurrentHashMap<>(); if (null != vertexProgram) { for (final String key : vertexProgram.getMemoryComputeKeys()) { MemoryHelper.validateKey(key); this.memoryKeys.add(key); } } for (final MapReduce mapReduce : mapReducers) { this.memoryKeys.add(mapReduce.getMemoryKey()); } }
protected void complete(final MapReduce<K, V, ?, ?, ?> mapReduce) { if (!this.doReduce && mapReduce.getMapKeySort().isPresent()) { final Comparator<K> comparator = mapReduce.getMapKeySort().get(); final List<KeyValue<K, V>> list = new ArrayList<>(this.mapQueue); Collections.sort(list, Comparator.comparing(KeyValue::getKey, comparator)); this.mapQueue.clear(); this.mapQueue.addAll(list); } else if (mapReduce.getMapKeySort().isPresent()) { final Comparator<K> comparator = mapReduce.getMapKeySort().get(); final List<Map.Entry<K, Queue<V>>> list = new ArrayList<>(); list.addAll(this.reduceMap.entrySet()); Collections.sort(list, Comparator.comparing(Map.Entry::getKey, comparator)); this.reduceMap = new LinkedHashMap<>(); list.forEach(entry -> this.reduceMap.put(entry.getKey(), entry.getValue())); } } }
protected void complete(final MapReduce<?, ?, OK, OV, ?> mapReduce) { if (mapReduce.getReduceKeySort().isPresent()) { final Comparator<OK> comparator = mapReduce.getReduceKeySort().get(); final List<KeyValue<OK, OV>> list = new ArrayList<>(this.reduceQueue); Collections.sort(list, Comparator.comparing(KeyValue::getKey, comparator)); this.reduceQueue.clear(); this.reduceQueue.addAll(list); } } }
this.persistMode = GraphComputerHelper.getPersistState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.persistMode)); this.resultGraphMode = GraphComputerHelper.getResultGraphState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.resultGraphMode)); if (mapReduce.doStage(MapReduce.Stage.MAP)) { FulgoraMapEmitter mapEmitter = new FulgoraMapEmitter<>(mapReduce.doStage(MapReduce.Stage.REDUCE)); mapJobs.put(mapReduce, mapEmitter); MapReduce mapReduce = mapJob.getKey(); if (mapReduce.doStage(MapReduce.Stage.REDUCE)) { final FulgoraReduceEmitter<?, ?> reduceEmitter = new FulgoraReduceEmitter<>(); try (WorkerPool workers = new WorkerPool(numThreads)) { workers.submit(() -> mapReduce.workerStart(MapReduce.Stage.REDUCE)); for (final Map.Entry queueEntry : mapEmitter.reduceMap.entrySet()) { workers.submit(() -> mapReduce.reduce(queueEntry.getKey(), ((Iterable) queueEntry.getValue()).iterator(), reduceEmitter)); workers.submit(() -> mapReduce.workerEnd(MapReduce.Stage.REDUCE)); } catch (Exception e) { throw new TitanException("Exception while executing reduce phase", e); mapReduce.addResultToMemory(this.memory, reduceEmitter.reduceQueue.iterator()); } else { mapReduce.addResultToMemory(this.memory, mapEmitter.mapQueue.iterator());
public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce( final JavaPairRDD<K, V> mapOrCombineRDD, final MapReduce<K, V, OK, OV, ?> mapReduce, final Configuration graphComputerConfiguration) { JavaPairRDD<OK, OV> reduceRDD = mapOrCombineRDD.groupByKey().mapPartitionsToPair(partitionIterator -> { KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration); return new ReduceIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator); }); if (mapReduce.getReduceKeySort().isPresent()) reduceRDD = reduceRDD.sortByKey(mapReduce.getReduceKeySort().get(), true, 1); return reduceRDD; } }
public static <K, V> JavaPairRDD<K, V> executeMap( final JavaPairRDD<Object, VertexWritable> graphRDD, final MapReduce<K, V, ?, ?, ?> mapReduce, final Configuration graphComputerConfiguration) { JavaPairRDD<K, V> mapRDD = graphRDD.mapPartitionsToPair(partitionIterator -> { KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration); return new MapIterator<>(MapReduce.<MapReduce<K, V, ?, ?, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator); }); if (mapReduce.getMapKeySort().isPresent()) mapRDD = mapRDD.sortByKey(mapReduce.getMapKeySort().get(), true, 1); return mapRDD; }
String inputLocation = Constants .getSearchGraphLocation(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION), fileSystemStorage).orElse(null); if (null != inputLocation) { try { mapReduce.storeState(newApacheConfiguration); final JavaPairRDD combineRDD = mapReduce.doStage(MapReduce.Stage.COMBINE) ? SparkExecutor.executeCombine(mapRDD, newApacheConfiguration) : mapRDD; final JavaPairRDD reduceRDD = mapReduce.doStage(MapReduce.Stage.REDUCE) ? SparkExecutor.executeReduce(combineRDD, mapReduce, newApacheConfiguration) : combineRDD; mapReduce.addResultToMemory(finalMemory, outputRDD.writeMemoryRDD(graphComputerConfiguration, mapReduce.getMemoryKey(), reduceRDD));
@Override public void setConf(final Configuration configuration) { super.setConf(configuration); final org.apache.commons.configuration.Configuration apacheConfiguration = ConfUtil.makeApacheConfiguration(configuration); this.comparator = MapReduce.<MapReduce<?,?,?,?,?>>createMapReduce(HadoopGraph.open(apacheConfiguration),apacheConfiguration).getReduceKeySort().get(); } }
@Override public void setConf(final Configuration configuration) { super.setConf(configuration); final org.apache.commons.configuration.Configuration apacheConfiguration = ConfUtil.makeApacheConfiguration(configuration); this.comparator = MapReduce.<MapReduce<?,?,?,?,?>>createMapReduce(HadoopGraph.open(apacheConfiguration),apacheConfiguration).getMapKeySort().get(); } }
mapReduce.storeState(apacheConfiguration); ConfUtil.mergeApacheIntoHadoopConfiguration(apacheConfiguration, newConfiguration); final Optional<Comparator<?>> mapSort = mapReduce.getMapKeySort(); final Optional<Comparator<?>> reduceSort = mapReduce.getReduceKeySort(); newConfiguration.setClass(Constants.GREMLIN_HADOOP_MAP_REDUCE_CLASS, mapReduce.getClass(), MapReduce.class); final Job job = Job.getInstance(newConfiguration, mapReduce.toString()); HadoopGraph.LOGGER.info(Constants.GREMLIN_HADOOP_JOB_PREFIX + mapReduce.toString()); job.setJarByClass(HadoopGraph.class); if (mapSort.isPresent()) job.setSortComparatorClass(ObjectWritableComparator.ObjectWritableMapComparator.class); job.setMapperClass(HadoopMap.class); if (mapReduce.doStage(MapReduce.Stage.REDUCE)) { if (mapReduce.doStage(MapReduce.Stage.COMBINE)) job.setCombinerClass(HadoopCombine.class); job.setReducerClass(HadoopReduce.class); } else { if (mapSort.isPresent()) { job.setReducerClass(Reducer.class); job.setNumReduceTasks(1); // todo: is this necessary to ensure sorted order? Path memoryPath = new Path(Constants.getMemoryLocation(newConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), (reduceSort.isPresent() ? mapReduce.getMemoryKey() + "-temp" : mapReduce.getMemoryKey()))); if (FileSystem.get(newConfiguration).exists(memoryPath)) { FileSystem.get(newConfiguration).delete(memoryPath, true); reduceSortJob.setNumReduceTasks(1); // todo: is this necessary to ensure sorted order? FileInputFormat.setInputPaths(reduceSortJob, memoryPath); final Path sortedMemoryPath = new Path(Constants.getMemoryLocation(newConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), mapReduce.getMemoryKey())); FileOutputFormat.setOutputPath(reduceSortJob, sortedMemoryPath);
@Override public void setup(final Mapper<NullWritable, VertexWritable, ObjectWritable, ObjectWritable>.Context context) { final Configuration apacheConfiguration = ConfUtil.makeApacheConfiguration(context.getConfiguration()); KryoShimServiceLoader.applyConfiguration(apacheConfiguration); this.mapReduce = MapReduce.createMapReduce(HadoopGraph.open(apacheConfiguration), apacheConfiguration); this.mapReduce.workerStart(MapReduce.Stage.MAP); }
@Override public void workerIterationStart(TitanGraph graph, Configuration config, ScanMetrics metrics) { for (Map.Entry<MapReduce, FulgoraMapEmitter> mapJob : mapJobs.entrySet()) { mapJob.getKey().workerStart(MapReduce.Stage.MAP); } }
@Override public void workerIterationEnd(ScanMetrics metrics) { for (Map.Entry<MapReduce, FulgoraMapEmitter> mapJob : mapJobs.entrySet()) { mapJob.getKey().workerEnd(MapReduce.Stage.MAP); } }
@Override public void process(TitanVertex vertex, ScanMetrics metrics) { PreloadedVertex v = (PreloadedVertex) vertex; if (vertexMemory != null) { VertexMemoryHandler vh = new VertexMemoryHandler(vertexMemory, v); v.setPropertyMixing(vh); } v.setAccessCheck(MAPREDUCE_CHECK); if (idManager.isPartitionedVertex(v.longId()) && !idManager.isCanonicalVertexId(v.longId())) { return; //Only consider the canonical partition vertex representative } else { for (Map.Entry<MapReduce, FulgoraMapEmitter> mapJob : mapJobs.entrySet()) { MapReduce job = mapJob.getKey(); try { job.map(v, mapJob.getValue()); metrics.incrementCustom(MAP_JOB_SUCCESS); } catch (Throwable ex) { log.error("Encountered exception executing map job [" + job + "] on vertex [" + vertex + "]:", ex); metrics.incrementCustom(MAP_JOB_FAILURE); } } } }
/** * The final result can be generated and added to {@link Memory} and accessible via {@link org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult}. * The default simply takes the object from generateFinalResult() and adds it to the Memory given getMemoryKey(). * * @param memory the memory of the {@link GraphComputer} * @param keyValues the key/value pairs emitted from reduce() (or map() in a map only job). */ public default void addResultToMemory(final Memory.Admin memory, final Iterator<KeyValue<RK, RV>> keyValues) { memory.set(this.getMemoryKey(), this.generateFinalResult(keyValues)); }
private void processNext() { final Tuple2<K, Iterable<V>> nextKeyValues = this.inputIterator.next(); this.mapReduce.reduce(nextKeyValues._1(), nextKeyValues._2().iterator(), this.reduceIteratorEmitter); }
@Override public void storeState(final Configuration configuration) { MapReduce.super.storeState(configuration); }
public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce( final JavaPairRDD<K, V> mapOrCombineRDD, final MapReduce<K, V, OK, OV, ?> mapReduce, final Configuration graphComputerConfiguration) { JavaPairRDD<OK, OV> reduceRDD = mapOrCombineRDD.groupByKey().mapPartitionsToPair(partitionIterator -> { KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration); return new ReduceIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator); }); if (mapReduce.getReduceKeySort().isPresent()) reduceRDD = reduceRDD.sortByKey(mapReduce.getReduceKeySort().get(), true, 1); return reduceRDD; } }