org.apache.spark.api.java.JavaPairRDD.values java code examples

public static Map<String,Integer> countDistinctOtherWords(JavaPairRDD<String,String> data) {
 return data.values().flatMapToPair(line -> {
  Set<String> distinctTokens = new HashSet<>(Arrays.asList(line.split(" ")));
  return distinctTokens.stream().flatMap(a ->
   distinctTokens.stream().filter(b -> !a.equals(b)).map(b -> new Tuple2<>(a, b))
  ).iterator();
 }).distinct().mapValues(a -> 1).reduceByKey((c1, c2) -> c1 + c2).collectAsMap();
}

private static JavaRDD<String[]> getSequenceFormatHiveInput(JavaSparkContext sc, String inputPath) {
  return sc.sequenceFile(inputPath, BytesWritable.class, Text.class).values()
      .map(new Function<Text, String[]>() {
        @Override
        public String[] call(Text text) throws Exception {
          String s = Bytes.toString(text.getBytes(), 0, text.getLength());
          return s.split(BatchConstants.SEQUENCE_FILE_DEFAULT_DELIMITER, -1);
        }
      });
}

@Override
public Iterable<String> buildUpdates(JavaPairRDD<String,String> newData) {
 return newData.values().collect();
}

@Override
double evaluate(JavaRDD<Vector> evalData) {
 return fetchClusterMetrics(evalData).values().mapToDouble(ClusterMetric::getSumSquaredDist).sum();
}

/**
 * @param evalData data for evaluation
 * @return the Dunn Index of a given clustering
 *  (https://en.wikipedia.org/wiki/Cluster_analysis#Internal_evaluation); higher is better
 */
@Override
double evaluate(JavaRDD<Vector> evalData) {
 // Intra-cluster distance is mean distance to centroid
 double maxIntraClusterDistance =
   fetchClusterMetrics(evalData).values().mapToDouble(ClusterMetric::getMeanDist).max();
 // Inter-cluster distance is distance between centroids
 double minInterClusterDistance = Double.POSITIVE_INFINITY;
 List<ClusterInfo> clusters = new ArrayList<>(getClustersByID().values());
 DistanceFn<double[]> distanceFn = getDistanceFn();
 for (int i = 0; i < clusters.size(); i++) {
  double[] centerI = clusters.get(i).getCenter();
  // Distances are symmetric, hence d(i,j) == d(j,i)
  for (int j = i + 1; j < clusters.size(); j++) {
   double[] centerJ = clusters.get(j).getCenter();
   minInterClusterDistance = Math.min(minInterClusterDistance, distanceFn.applyAsDouble(centerI, centerJ));
  }
 }
 return minInterClusterDistance / maxIntraClusterDistance;
}

JavaRDD<M> newData = newKeyMessageData.values();
JavaRDD<M> pastData = pastKeyMessageData == null ? null : pastKeyMessageData.values();

return timestampRatingRDD.sortByKey().values();

/**
 * Computes root mean squared error of {@link Rating#rating()} versus predicted value.
 */
static double rmse(MatrixFactorizationModel mfModel, JavaRDD<Rating> testData) {
 JavaPairRDD<Tuple2<Integer,Integer>,Double> testUserProductValues =
   testData.mapToPair(rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating()));
 @SuppressWarnings("unchecked")
 RDD<Tuple2<Object,Object>> testUserProducts =
   (RDD<Tuple2<Object,Object>>) (RDD<?>) testUserProductValues.keys().rdd();
 JavaRDD<Rating> predictions = testData.wrapRDD(mfModel.predict(testUserProducts));
 double mse = predictions.mapToPair(
   rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating())
 ).join(testUserProductValues).values().mapToDouble(valuePrediction -> {
  double diff = valuePrediction._1() - valuePrediction._2();
  return diff * diff;
 }).mean();
 return Math.sqrt(mse);
}

@Test
public void zipWithUniqueId() {
 List<Integer> dataArray = Arrays.asList(1, 2, 3, 4);
 JavaPairRDD<Integer, Long> zip = sc.parallelize(dataArray).zipWithUniqueId();
 JavaRDD<Long> indexes = zip.values();
 assertEquals(4, new HashSet<>(indexes.collect()).size());
}

@Test
public void zipWithUniqueId() {
 List<Integer> dataArray = Arrays.asList(1, 2, 3, 4);
 JavaPairRDD<Integer, Long> zip = sc.parallelize(dataArray).zipWithUniqueId();
 JavaRDD<Long> indexes = zip.values();
 assertEquals(4, new HashSet<>(indexes.collect()).size());
}

@Test
public void zipWithUniqueId() {
 List<Integer> dataArray = Arrays.asList(1, 2, 3, 4);
 JavaPairRDD<Integer, Long> zip = sc.parallelize(dataArray).zipWithUniqueId();
 JavaRDD<Long> indexes = zip.values();
 assertEquals(4, new HashSet<>(indexes.collect()).size());
}

return newData.values().map(MLFunctions.PARSE_FN).mapToPair(data -> {
 try {
  double[] featureVector = KMeansUtils.featuresFromTokens(data, inputSchema);

JavaRDD<Example> examplesRDD = newData.values().map(MLFunctions.PARSE_FN).
  map(data -> ExampleUtils.dataToExample(data, inputSchema, valueEncodings));

@Test
public void zipWithIndex() {
 List<Integer> dataArray = Arrays.asList(1, 2, 3, 4);
 JavaPairRDD<Integer, Long> zip = sc.parallelize(dataArray).zipWithIndex();
 JavaRDD<Long> indexes = zip.values();
 List<Long> correctIndexes = Arrays.asList(0L, 1L, 2L, 3L);
 assertEquals(correctIndexes, indexes.collect());
}

@Test
public void zipWithIndex() {
 List<Integer> dataArray = Arrays.asList(1, 2, 3, 4);
 JavaPairRDD<Integer, Long> zip = sc.parallelize(dataArray).zipWithIndex();
 JavaRDD<Long> indexes = zip.values();
 List<Long> correctIndexes = Arrays.asList(0L, 1L, 2L, 3L);
 assertEquals(correctIndexes, indexes.collect());
}

@Test
public void zipWithIndex() {
 List<Integer> dataArray = Arrays.asList(1, 2, 3, 4);
 JavaPairRDD<Integer, Long> zip = sc.parallelize(dataArray).zipWithIndex();
 JavaRDD<Long> indexes = zip.values();
 List<Long> correctIndexes = Arrays.asList(0L, 1L, 2L, 3L);
 assertEquals(correctIndexes, indexes.collect());
}

  newData.values().sortBy(MLFunctions.TO_TIMESTAMP_FN, true, newData.partitions().size());
JavaPairRDD<Tuple2<String,String>,Double> tuples = sortedValues.mapToPair(line -> {
 try {

String distScriptName = "finddistance.R";
sc.addFile(distScript);
JavaRDD<String> pipeInputs = contactsContactLists.values().map(new VerifyCallLogs()).flatMap(
 new FlatMapFunction<CallLog[], String>() { public Iterable<String> call(CallLog[] calls) {
   ArrayList<String> latLons = new ArrayList<String>();

Broadcast<List<Integer>> allItemIDsBC = sparkContext.broadcast(positiveUserProducts.values().distinct().collect());
  predictAll(mfModel, positiveData, negativeUserProducts);
return positivePredictions.join(negativePredictions).values().mapToDouble(t -> {

final JavaRDD<Traverser.Admin<Object>> nextRDD = inputRDD.values()

Popular methods of JavaPairRDD

Popular in Java

Start an intent from android
scheduleAtFixedRate (ScheduledExecutorService)
requestLocationUpdates (LocationManager)
putExtra (Intent)
BufferedInputStream (java.io)
A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
ArrayList (java.util)
ArrayList is an implementation of List, backed by an array. All optional operations including adding
BitSet (java.util)
The BitSet class implements abit array [http://en.wikipedia.org/wiki/Bit_array]. Each element is eit
GregorianCalendar (java.util)
GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
Best plugins for Eclipse

How to use valuesmethodin org.apache.spark.api.java.JavaPairRDD

Best Java code snippets using org.apache.spark.api.java.JavaPairRDD.values (Showing top 20 results out of 342)

How to use
values
method
in
org.apache.spark.api.java.JavaPairRDD