org.apache.spark.api.java.JavaPairRDD.cache java code examples

@Test
public void binaryFilesCaching() throws Exception {
 // Reusing the wholeText files example
 byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8);
 String tempDirName = tempDir.getAbsolutePath();
 File file1 = new File(tempDirName + "/part-00000");
 FileOutputStream fos1 = new FileOutputStream(file1);
 FileChannel channel1 = fos1.getChannel();
 ByteBuffer bbuf = ByteBuffer.wrap(content1);
 channel1.write(bbuf);
 channel1.close();
 JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName).cache();
 readRDD.foreach(pair -> pair._2().toArray()); // force the file to read
 List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
 for (Tuple2<String, PortableDataStream> res : result) {
  assertArrayEquals(content1, res._2().toArray());
 }
}

@Test
public void binaryFilesCaching() throws Exception {
 // Reusing the wholeText files example
 byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8);
 String tempDirName = tempDir.getAbsolutePath();
 File file1 = new File(tempDirName + "/part-00000");
 FileOutputStream fos1 = new FileOutputStream(file1);
 FileChannel channel1 = fos1.getChannel();
 ByteBuffer bbuf = ByteBuffer.wrap(content1);
 channel1.write(bbuf);
 channel1.close();
 JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName).cache();
 readRDD.foreach(pair -> pair._2().toArray()); // force the file to read
 List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
 for (Tuple2<String, PortableDataStream> res : result) {
  assertArrayEquals(content1, res._2().toArray());
 }
}

@Test
public void binaryFilesCaching() throws Exception {
 // Reusing the wholeText files example
 byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8);
 String tempDirName = tempDir.getAbsolutePath();
 File file1 = new File(tempDirName + "/part-00000");
 FileOutputStream fos1 = new FileOutputStream(file1);
 FileChannel channel1 = fos1.getChannel();
 ByteBuffer bbuf = ByteBuffer.wrap(content1);
 channel1.write(bbuf);
 channel1.close();
 JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName).cache();
 readRDD.foreach(pair -> pair._2().toArray()); // force the file to read
 List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
 for (Tuple2<String, PortableDataStream> res : result) {
  assertArrayEquals(content1, res._2().toArray());
 }
}

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

/**
 * Cache the result RDD, since K-Means is an iterative machine learning 
 * algorithm and the result will be used many times
 *
 * @param wikiData, a featureized data
 * @param context a Java spark context object
 * @return JavaPairRDD<String, Vector>, where K is <project_code> + " " + <page_title>
 * and V is a Vector of features
 *
 */
static JavaPairRDD<String, Vector> getFeatureizedData(
    String wikiData, 
    JavaSparkContext context) {
  //
  JavaPairRDD<String, Vector> data = context.textFile(wikiData).mapToPair(
      new PairFunction<String, String, Vector>() {
    @Override
    public Tuple2<String, Vector> call(String in) throws Exception {
      // in: <key><#><feature_1><,><feature_2><,>...<,><feature_24>
      String[] parts = StringUtils.split(in, "#");
      return new Tuple2<String, Vector>(parts[0], Util.buildVector(parts[1], ","));
    }
  }).cache();
  return data;
}

/**
 * Cache the underlying RDD.
 * @return the cached {@link StructureDataRDD}
 */
public StructureDataRDD cache() {
  javaPairRdd.cache();
  return this;
}

/**
 * Cache the data. Good if the user wants to produce multiple analyses off the same
 * data.
 * @return the cached data object.
 */
public SegmentDataRDD cache() {
  return new SegmentDataRDD(segmentRDD.cache());
}

@Override
public MPairStream<T, U> cache() {
 return new SparkPairStream<>(rdd.cache());
}

/**
 * Cache the result RDD, since K-Means is an iterative machine learning
 * algorithm and the result will be used many times
 *
 * @param wikiData, a featureized data
 * @param context a Java spark context object
 * @return JavaPairRDD<String, Vector>, where K is <project_code> + " " + <page_title>
 * and V is a Vector of features
 *
 */
static JavaPairRDD<String, Vector> getFeatureizedData(
    String wikiData,
    JavaSparkContext context) {
  //
  return context.textFile(wikiData).mapToPair(
      (PairFunction<String, String, Vector>) in -> {
        // in: <key><#><feature_1><,><feature_2><,>...<,><feature_24>
        String[] parts = StringUtils.split(in, "#");
        return new Tuple2<>(parts[0], Util.buildVector(parts[1], ","));
      }).cache();
}

  }).cache();
       return (t != null);
     }).cache();
samples.unpersist();
JavaPairRDD<String, String> correct = validSamples.filter(new EvaluateSample(model)).cache();
    return i1 + i2;
  }).cache();
    return i1 + i2;
  }).cache();

instances.cache();

  String[] parts = SPACES.split(s);
  return new Tuple2<>(parts[0], parts[1]);
}).distinct().groupByKey().cache();

long numVariables = oneFullAssignment.cache().count();
config.endLog("[gaussianFitApp] oneFullAssignment");
constraints.A().cache().count();
constraints.B().cache().count();
long totalNumConstraints = constraints.C().cache().count();
config.endLog("[gaussianFitApp] constraints generation");
System.out.println("[numconstraints: " + totalNumConstraints + "]");

long numVariables2 = oneFullAssignment.cache().count();
config.endLog("[matmulApp] oneFullAssignment");
constraints.A().cache().count();
constraints.B().cache().count();
long totalNumConstraints = constraints.C().cache().count();
config.endLog("[matmulApp] constraints generation");
System.out.println("[numconstraints: " + totalNumConstraints + "]");

rdd.getRDD().cache();

Popular methods of JavaPairRDD

Popular in Java

Reactive rest calls using spring rest template
getSharedPreferences (Context)
onRequestPermissionsResult (Fragment)
getSystemService (Context)
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
Timestamp (java.sql)
A Java representation of the SQL TIMESTAMP type. It provides the capability of representing the SQL
Vector (java.util)
Vector is an implementation of List, backed by an array and synchronized. All optional operations in
Logger (org.apache.log4j)
This is the central class in the log4j package. Most logging operations, except configuration, are d
JFrame (javax.swing)
Top 12 Jupyter Notebook extensions

How to use cachemethodin org.apache.spark.api.java.JavaPairRDD

Best Java code snippets using org.apache.spark.api.java.JavaPairRDD.cache (Showing top 20 results out of 315)

How to use
cache
method
in
org.apache.spark.api.java.JavaPairRDD