org.apache.tinkerpop.gremlin.process.computer.MapReduce java code examples

Refine search

public FulgoraMemory(final VertexProgram<?> vertexProgram, final Set<MapReduce> mapReducers) {
  this.currentMap = new ConcurrentHashMap<>();
  this.previousMap = new ConcurrentHashMap<>();
  if (null != vertexProgram) {
    for (final String key : vertexProgram.getMemoryComputeKeys()) {
      MemoryHelper.validateKey(key);
      this.memoryKeys.add(key);
    }
  }
  for (final MapReduce mapReduce : mapReducers) {
    this.memoryKeys.add(mapReduce.getMemoryKey());
  }
}

  protected void complete(final MapReduce<K, V, ?, ?, ?> mapReduce) {
    if (!this.doReduce && mapReduce.getMapKeySort().isPresent()) {
      final Comparator<K> comparator = mapReduce.getMapKeySort().get();
      final List<KeyValue<K, V>> list = new ArrayList<>(this.mapQueue);
      Collections.sort(list, Comparator.comparing(KeyValue::getKey, comparator));
      this.mapQueue.clear();
      this.mapQueue.addAll(list);
    } else if (mapReduce.getMapKeySort().isPresent()) {
      final Comparator<K> comparator = mapReduce.getMapKeySort().get();
      final List<Map.Entry<K, Queue<V>>> list = new ArrayList<>();
      list.addAll(this.reduceMap.entrySet());
      Collections.sort(list, Comparator.comparing(Map.Entry::getKey, comparator));
      this.reduceMap = new LinkedHashMap<>();
      list.forEach(entry -> this.reduceMap.put(entry.getKey(), entry.getValue()));
    }
  }
}

  protected void complete(final MapReduce<?, ?, OK, OV, ?> mapReduce) {
    if (mapReduce.getReduceKeySort().isPresent()) {
      final Comparator<OK> comparator = mapReduce.getReduceKeySort().get();
      final List<KeyValue<OK, OV>> list = new ArrayList<>(this.reduceQueue);
      Collections.sort(list, Comparator.comparing(KeyValue::getKey, comparator));
      this.reduceQueue.clear();
      this.reduceQueue.addAll(list);
    }
  }
}

this.persistMode = GraphComputerHelper.getPersistState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.persistMode));
this.resultGraphMode = GraphComputerHelper.getResultGraphState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.resultGraphMode));
    if (mapReduce.doStage(MapReduce.Stage.MAP)) {
      FulgoraMapEmitter mapEmitter = new FulgoraMapEmitter<>(mapReduce.doStage(MapReduce.Stage.REDUCE));
      mapJobs.put(mapReduce, mapEmitter);
    MapReduce mapReduce = mapJob.getKey();
    if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
      final FulgoraReduceEmitter<?, ?> reduceEmitter = new FulgoraReduceEmitter<>();
      try (WorkerPool workers = new WorkerPool(numThreads)) {
        workers.submit(() -> mapReduce.workerStart(MapReduce.Stage.REDUCE));
        for (final Map.Entry queueEntry : mapEmitter.reduceMap.entrySet()) {
          workers.submit(() -> mapReduce.reduce(queueEntry.getKey(), ((Iterable) queueEntry.getValue()).iterator(), reduceEmitter));
        workers.submit(() -> mapReduce.workerEnd(MapReduce.Stage.REDUCE));
      } catch (Exception e) {
        throw new TitanException("Exception while executing reduce phase", e);
      mapReduce.addResultToMemory(this.memory, reduceEmitter.reduceQueue.iterator());
    } else {
      mapReduce.addResultToMemory(this.memory, mapEmitter.mapQueue.iterator());

  public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce(
      final JavaPairRDD<K, V> mapOrCombineRDD, final MapReduce<K, V, OK, OV, ?> mapReduce,
      final Configuration graphComputerConfiguration) {
    JavaPairRDD<OK, OV> reduceRDD = mapOrCombineRDD.groupByKey().mapPartitionsToPair(partitionIterator -> {
      KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
      return new ReduceIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
    });
    if (mapReduce.getReduceKeySort().isPresent())
      reduceRDD = reduceRDD.sortByKey(mapReduce.getReduceKeySort().get(), true, 1);
    return reduceRDD;
  }
}

public static <K, V> JavaPairRDD<K, V> executeMap(
    final JavaPairRDD<Object, VertexWritable> graphRDD, final MapReduce<K, V, ?, ?, ?> mapReduce,
    final Configuration graphComputerConfiguration) {
  JavaPairRDD<K, V> mapRDD = graphRDD.mapPartitionsToPair(partitionIterator -> {
    KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
    return new MapIterator<>(MapReduce.<MapReduce<K, V, ?, ?, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
  });
  if (mapReduce.getMapKeySort().isPresent())
    mapRDD = mapRDD.sortByKey(mapReduce.getMapKeySort().get(), true, 1);
  return mapRDD;
}

String inputLocation = Constants
    .getSearchGraphLocation(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION),
        fileSystemStorage).orElse(null);
if (null != inputLocation) {
  try {
    mapReduce.storeState(newApacheConfiguration);
    final JavaPairRDD combineRDD = mapReduce.doStage(MapReduce.Stage.COMBINE) ? SparkExecutor.executeCombine(mapRDD, newApacheConfiguration) : mapRDD;
    final JavaPairRDD reduceRDD = mapReduce.doStage(MapReduce.Stage.REDUCE) ? SparkExecutor.executeReduce(combineRDD, mapReduce, newApacheConfiguration) : combineRDD;
      mapReduce.addResultToMemory(finalMemory, outputRDD.writeMemoryRDD(graphComputerConfiguration, mapReduce.getMemoryKey(), reduceRDD));

  @Override
  public void setConf(final Configuration configuration) {
    super.setConf(configuration);
    final org.apache.commons.configuration.Configuration apacheConfiguration = ConfUtil.makeApacheConfiguration(configuration);
    this.comparator = MapReduce.<MapReduce<?,?,?,?,?>>createMapReduce(HadoopGraph.open(apacheConfiguration),apacheConfiguration).getReduceKeySort().get();
  }
}

  @Override
  public void setConf(final Configuration configuration) {
    super.setConf(configuration);
    final org.apache.commons.configuration.Configuration apacheConfiguration = ConfUtil.makeApacheConfiguration(configuration);
    this.comparator = MapReduce.<MapReduce<?,?,?,?,?>>createMapReduce(HadoopGraph.open(apacheConfiguration),apacheConfiguration).getMapKeySort().get();
  }
}

mapReduce.storeState(apacheConfiguration);
ConfUtil.mergeApacheIntoHadoopConfiguration(apacheConfiguration, newConfiguration);
final Optional<Comparator<?>> mapSort = mapReduce.getMapKeySort();
final Optional<Comparator<?>> reduceSort = mapReduce.getReduceKeySort();
newConfiguration.setClass(Constants.GREMLIN_HADOOP_MAP_REDUCE_CLASS, mapReduce.getClass(), MapReduce.class);
final Job job = Job.getInstance(newConfiguration, mapReduce.toString());
HadoopGraph.LOGGER.info(Constants.GREMLIN_HADOOP_JOB_PREFIX + mapReduce.toString());
job.setJarByClass(HadoopGraph.class);
if (mapSort.isPresent())
  job.setSortComparatorClass(ObjectWritableComparator.ObjectWritableMapComparator.class);
job.setMapperClass(HadoopMap.class);
if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
  if (mapReduce.doStage(MapReduce.Stage.COMBINE))
    job.setCombinerClass(HadoopCombine.class);
  job.setReducerClass(HadoopReduce.class);
} else {
  if (mapSort.isPresent()) {
    job.setReducerClass(Reducer.class);
    job.setNumReduceTasks(1); // todo: is this necessary to ensure sorted order?
Path memoryPath = new Path(Constants.getMemoryLocation(newConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), (reduceSort.isPresent() ? mapReduce.getMemoryKey() + "-temp" : mapReduce.getMemoryKey())));
if (FileSystem.get(newConfiguration).exists(memoryPath)) {
  FileSystem.get(newConfiguration).delete(memoryPath, true);
  reduceSortJob.setNumReduceTasks(1); // todo: is this necessary to ensure sorted order?
  FileInputFormat.setInputPaths(reduceSortJob, memoryPath);
  final Path sortedMemoryPath = new Path(Constants.getMemoryLocation(newConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), mapReduce.getMemoryKey()));
  FileOutputFormat.setOutputPath(reduceSortJob, sortedMemoryPath);

@Override
public Tuple2<K, V> next() {
  if (!this.queue.isEmpty())
    return this.queue.remove();
  else if (!this.inputIterator.hasNext()) {
    this.mapReduce.workerEnd(MapReduce.Stage.MAP);
    throw FastNoSuchElementException.instance();
  } else {
    this.processNext();
    return this.next();
  }
}

@Override
public boolean hasNext() {
  if (!this.queue.isEmpty())
    return true;
  else if (!this.inputIterator.hasNext()) {
    this.mapReduce.workerEnd(MapReduce.Stage.MAP);
    return false;
  } else {
    this.processNext();
    return this.hasNext();
  }
}

@Override
public void setup(final Mapper<NullWritable, VertexWritable, ObjectWritable, ObjectWritable>.Context context) {
  final Configuration apacheConfiguration = ConfUtil.makeApacheConfiguration(context.getConfiguration());
  KryoShimServiceLoader.applyConfiguration(apacheConfiguration);
  this.mapReduce = MapReduce.createMapReduce(HadoopGraph.open(apacheConfiguration), apacheConfiguration);
  this.mapReduce.workerStart(MapReduce.Stage.MAP);
}

@Override
public void workerIterationStart(TitanGraph graph, Configuration config, ScanMetrics metrics) {
  for (Map.Entry<MapReduce, FulgoraMapEmitter> mapJob : mapJobs.entrySet()) {
    mapJob.getKey().workerStart(MapReduce.Stage.MAP);
  }
}

@Override
public void workerIterationEnd(ScanMetrics metrics) {
  for (Map.Entry<MapReduce, FulgoraMapEmitter> mapJob : mapJobs.entrySet()) {
    mapJob.getKey().workerEnd(MapReduce.Stage.MAP);
  }
}

@Override
public void process(TitanVertex vertex, ScanMetrics metrics) {
  PreloadedVertex v = (PreloadedVertex) vertex;
  if (vertexMemory != null) {
    VertexMemoryHandler vh = new VertexMemoryHandler(vertexMemory, v);
    v.setPropertyMixing(vh);
  }
  v.setAccessCheck(MAPREDUCE_CHECK);
  if (idManager.isPartitionedVertex(v.longId()) && !idManager.isCanonicalVertexId(v.longId())) {
    return; //Only consider the canonical partition vertex representative
  } else {
    for (Map.Entry<MapReduce, FulgoraMapEmitter> mapJob : mapJobs.entrySet()) {
      MapReduce job = mapJob.getKey();
      try {
        job.map(v, mapJob.getValue());
        metrics.incrementCustom(MAP_JOB_SUCCESS);
      } catch (Throwable ex) {
        log.error("Encountered exception executing map job [" + job + "] on vertex [" + vertex + "]:", ex);
        metrics.incrementCustom(MAP_JOB_FAILURE);
      }
    }
  }
}

/**
 * The final result can be generated and added to {@link Memory} and accessible via {@link org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult}.
 * The default simply takes the object from generateFinalResult() and adds it to the Memory given getMemoryKey().
 *
 * @param memory    the memory of the {@link GraphComputer}
 * @param keyValues the key/value pairs emitted from reduce() (or map() in a map only job).
 */
public default void addResultToMemory(final Memory.Admin memory, final Iterator<KeyValue<RK, RV>> keyValues) {
  memory.set(this.getMemoryKey(), this.generateFinalResult(keyValues));
}

private void processNext() {
  final Tuple2<K, Iterable<V>> nextKeyValues = this.inputIterator.next();
  this.mapReduce.reduce(nextKeyValues._1(), nextKeyValues._2().iterator(), this.reduceIteratorEmitter);
}

@Override
public void storeState(final Configuration configuration) {
  MapReduce.super.storeState(configuration);
}

  public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce(
      final JavaPairRDD<K, V> mapOrCombineRDD, final MapReduce<K, V, OK, OV, ?> mapReduce,
      final Configuration graphComputerConfiguration) {
    JavaPairRDD<OK, OV> reduceRDD = mapOrCombineRDD.groupByKey().mapPartitionsToPair(partitionIterator -> {
      KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
      return new ReduceIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
    });
    if (mapReduce.getReduceKeySort().isPresent())
      reduceRDD = reduceRDD.sortByKey(mapReduce.getReduceKeySort().get(), true, 1);
    return reduceRDD;
  }
}

Javadoc

A MapReduce is composed of map(), combine(), and reduce() stages. The map() stage processes the vertices of the org.apache.tinkerpop.gremlin.structure.Graph in a logically parallel manner. The combine() stage aggregates the values of a particular map emitted key prior to sending across the cluster. The reduce() stage aggregates the values of the combine/map emitted keys for the keys that hash to the current machine in the cluster. The interface presented here is nearly identical to the interface popularized by Hadoop save the map() is over the vertices of the graph.

Most used methods

getMemoryKey
The results of the MapReduce job are associated with a memory-key to ultimately be stored in Memory.
getMapKeySort
If a Comparator is provided, then all pairs leaving the MapEmitter are sorted. The sorted results ar
getReduceKeySort
If a Comparator is provided, then all pairs leaving the ReduceEmitter are sorted. If sorting is not
addResultToMemory
The final result can be generated and added to Memory and accessible via org.apache.tinkerpop.gremli
doStage
A MapReduce job can be map-only, map-reduce-only, or map-combine-reduce. Before executing the partic
map
The map() method is logically executed at all vertices in the graph in parallel. The map() method em
reduce
The reduce() method is logically on the "machine" the respective key hashes to. The reduce() method
workerEnd
This method is called at the end of the respective MapReduce.Stage for a particular "chunk of vertic
workerStart
This method is called at the start of the respective MapReduce.Stage for a particular "chunk of vert
storeState
When it is necessary to store the state of a MapReduce job, this method is called. This is typically
clone
When multiple workers on a single machine need MapReduce instances, it is possible to use clone. Thi
createMapReduce
A helper method to construct a MapReduce given the content of the supplied configuration. The class

Popular in Java

Parsing JSON documents to java classes using gson
scheduleAtFixedRate (ScheduledExecutorService)
setRequestProperty (URLConnection)
scheduleAtFixedRate (Timer)
ObjectMapper (com.fasterxml.jackson.databind)
ObjectMapper provides functionality for reading and writing JSON, either to and from basic POJOs (Pl
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
URLConnection (java.net)
A connection to a URL for reading or writing. For HTTP connections, see HttpURLConnection for docume
Queue (java.util)
A collection designed for holding elements prior to processing. Besides basic java.util.Collection o
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
From CI to AI: The AI layer in your organization

How to useMapReduce in org.apache.tinkerpop.gremlin.process.computer

Best Java code snippets using org.apache.tinkerpop.gremlin.process.computer.MapReduce (Showing top 20 results out of 315)

Refine search

How to use
MapReduce
in
org.apache.tinkerpop.gremlin.process.computer