org.apache.spark.mllib.linalg.Vector.toArray java code examples

private JavaPairRDD<Integer, Iterable<double[]>> fetchClusteredPoints(JavaRDD<? extends Vector> evalData) {
 return evalData.mapToPair(vector -> {
  double closestDist = Double.POSITIVE_INFINITY;
  int minClusterID = Integer.MIN_VALUE;
  double[] vec = vector.toArray();
  DistanceFn<double[]> distanceFn = getDistanceFn();
  Map<Integer,ClusterInfo> clusters = getClustersByID();
  for (ClusterInfo cluster : clusters.values()) {
   double distance = distanceFn.applyAsDouble(cluster.getCenter(), vec);
   if (distance < closestDist) {
    closestDist = distance;
    minClusterID = cluster.getID();
   }
  }
  Preconditions.checkState(!Double.isInfinite(closestDist) && !Double.isNaN(closestDist));
  return new Tuple2<>(minClusterID, vec);
 }).groupByKey();
}

/**
 * @param evalData points to cluster for evaluation
 * @return cluster IDs as keys, and metrics for each cluster like the count, sum of distances to centroid,
 *  and sum of squared distances
 */
JavaPairRDD<Integer,ClusterMetric> fetchClusterMetrics(JavaRDD<Vector> evalData) {
 return evalData.mapToPair(vector -> {
  double closestDist = Double.POSITIVE_INFINITY;
  int minClusterID = Integer.MIN_VALUE;
  double[] vec = vector.toArray();
  for (ClusterInfo cluster : clusters.values()) {
   double distance = distanceFn.applyAsDouble(cluster.getCenter(), vec);
   if (distance < closestDist) {
    closestDist = distance;
    minClusterID = cluster.getID();
   }
  }
  Preconditions.checkState(!Double.isInfinite(closestDist) && !Double.isNaN(closestDist));
  return new Tuple2<>(minClusterID, new ClusterMetric(1L, closestDist, closestDist * closestDist));
 }).reduceByKey(ClusterMetric::add);
}

  mapToObj(i -> new IntLongHashMap()).collect(Collectors.toList());
data.forEachRemaining(datum -> {
 double[] featureVector = datum.features().toArray();
 for (int i = 0; i < trees.length; i++) {
  DecisionTreeModel tree = trees[i];

/**
 * @param trainPointData data to run down trees
 * @param model random decision forest model to count on
 * @return map of predictor index to the number of training examples that reached a
 *  node whose decision is based on that feature. The index is among predictors, not all
 *  features, since there are fewer predictors than features. That is, the index will
 *  match the one used in the {@link RandomForestModel}.
 */
private static IntLongHashMap predictorExampleCounts(JavaRDD<? extends LabeledPoint> trainPointData,
                           RandomForestModel model) {
 return trainPointData.mapPartitions(data -> {
   IntLongHashMap featureIndexCount = new IntLongHashMap();
   data.forEachRemaining(datum -> {
    double[] featureVector = datum.features().toArray();
    for (DecisionTreeModel tree : model.trees()) {
     org.apache.spark.mllib.tree.model.Node node = tree.topNode();
     // This logic cloned from Node.predict:
     while (!node.isLeaf()) {
      Split split = node.split().get();
      int featureIndex = split.feature();
      // Count feature
      featureIndexCount.addToValue(featureIndex, 1);
      node = nextNode(featureVector, node, split, featureIndex);
     }
    }
   });
   return Collections.singleton(featureIndexCount).iterator();
 }).reduce(RDFUpdate::merge);
}

private ClusteringModel pmmlClusteringModel(KMeansModel model,
                      Map<Integer,Long> clusterSizesMap) {
 Vector[] clusterCenters = model.clusterCenters();
 List<ClusteringField> clusteringFields = new ArrayList<>();
 for (int i = 0; i < inputSchema.getNumFeatures(); i++) {
  if (inputSchema.isActive(i)) {
   FieldName fieldName = FieldName.create(inputSchema.getFeatureNames().get(i));
   ClusteringField clusteringField =
     new ClusteringField(fieldName).setCenterField(ClusteringField.CenterField.TRUE);
   clusteringFields.add(clusteringField);
  }
 }
 List<Cluster> clusters = new ArrayList<>(clusterCenters.length);
 for (int i = 0; i < clusterCenters.length; i++) {
  clusters.add(new Cluster().setId(Integer.toString(i))
           .setSize(clusterSizesMap.get(i).intValue())
           .setArray(AppPMMLUtils.toArray(clusterCenters[i].toArray())));
 }
 return new ClusteringModel(
   MiningFunction.CLUSTERING,
   ClusteringModel.ModelClass.CENTER_BASED,
   clusters.size(),
   AppPMMLUtils.buildMiningSchema(inputSchema),
   new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE).setMeasure(new SquaredEuclidean()),
   clusteringFields,
   clusters);
}

static void debug(String record, Vector v) {
  THE_LOGGER.info("DEBUG started:");
  double[] d = v.toArray();
  StringBuilder builder = new StringBuilder();
  builder.append("DEBUG[record=");
  builder.append(record);
  builder.append("]:");
  for (int i=0; i < d.length; i++){
    builder.append("\t");
    builder.append(d[i]);
  }
  THE_LOGGER.info(builder.toString());
}

static void debug(String record, Vector v) {
  THE_LOGGER.info("DEBUG started:");
  double[] d = v.toArray();
  StringBuilder builder = new StringBuilder();
  builder.append("DEBUG[record=");
  builder.append(record);
  builder.append("]:");
  for (int i=0; i < d.length; i++){
    builder.append("\t");
    builder.append(d[i]);
  }
  THE_LOGGER.info(builder.toString());
}

@Override
public StandardScalerModelInfo getModelInfo(final StandardScalerModel from, final DataFrame df) {
  final StandardScalerModelInfo modelInfo = new StandardScalerModelInfo();
  modelInfo.setMean(from.mean().toArray());
  modelInfo.setStd(from.std().toArray());
  modelInfo.setWithMean(from.getWithMean());
  modelInfo.setWithStd(from.getWithStd());
  Set<String> inputKeys = new LinkedHashSet<String>();
  inputKeys.add(from.getInputCol());
  modelInfo.setInputKeys(inputKeys);
  Set<String> outputKeys = new LinkedHashSet<String>();
  outputKeys.add(from.getOutputCol());
  modelInfo.setOutputKeys(outputKeys);
  return modelInfo;
}

@Override
public MinMaxScalerModelInfo getModelInfo(final MinMaxScalerModel from, final DataFrame df) {
  final MinMaxScalerModelInfo modelInfo = new MinMaxScalerModelInfo();
  modelInfo.setOriginalMax(from.originalMax().toArray());
  modelInfo.setOriginalMin(from.originalMin().toArray());
  modelInfo.setMax(from.getMax());
  modelInfo.setMin(from.getMin());
  Set<String> inputKeys = new LinkedHashSet<String>();
  inputKeys.add(from.getInputCol());
  modelInfo.setInputKeys(inputKeys);
  Set<String> outputKeys = new LinkedHashSet<String>();
  outputKeys.add(from.getOutputCol());
  modelInfo.setOutputKeys(outputKeys);
  return modelInfo;
}

 @Test
 public void sparseArrayConstruction() {
  @SuppressWarnings("unchecked")
  Vector v = Vectors.sparse(3, Arrays.asList(
   new Tuple2<>(0, 2.0),
   new Tuple2<>(2, 3.0)));
  assertArrayEquals(new double[]{2.0, 0.0, 3.0}, v.toArray(), 0.0);
 }
}

 @Test
 public void sparseArrayConstruction() {
  @SuppressWarnings("unchecked")
  Vector v = Vectors.sparse(3, Arrays.asList(
   new Tuple2<>(0, 2.0),
   new Tuple2<>(2, 3.0)));
  assertArrayEquals(new double[]{2.0, 0.0, 3.0}, v.toArray(), 0.0);
 }
}

 @Test
 public void sparseArrayConstruction() {
  @SuppressWarnings("unchecked")
  Vector v = Vectors.sparse(3, Arrays.asList(
   new Tuple2<>(0, 2.0),
   new Tuple2<>(2, 3.0)));
  assertArrayEquals(new double[]{2.0, 0.0, 3.0}, v.toArray(), 0.0);
 }
}

@Override
public LogisticRegressionModelInfo getModelInfo(final LogisticRegressionModel sparkLRModel, DataFrame df) {
  final LogisticRegressionModelInfo logisticRegressionModelInfo = new LogisticRegressionModelInfo();
  logisticRegressionModelInfo.setWeights(sparkLRModel.weights().toArray());
  logisticRegressionModelInfo.setIntercept(sparkLRModel.intercept());
  logisticRegressionModelInfo.setNumClasses(sparkLRModel.numClasses());
  logisticRegressionModelInfo.setNumFeatures(sparkLRModel.numFeatures());
  logisticRegressionModelInfo.setThreshold((double) sparkLRModel.getThreshold().get());
  Set<String> inputKeys = new LinkedHashSet<String>();
  inputKeys.add("features");
  logisticRegressionModelInfo.setInputKeys(inputKeys);
  Set<String> outputKeys = new LinkedHashSet<String>();
  outputKeys.add("prediction");
  outputKeys.add("probability");
  logisticRegressionModelInfo.setOutputKeys(outputKeys);
  return logisticRegressionModelInfo;
}

@Override
public LogisticRegressionModelInfo getModelInfo(final LogisticRegressionModel sparkLRModel) {
  final LogisticRegressionModelInfo logisticRegressionModelInfo = new LogisticRegressionModelInfo();
  logisticRegressionModelInfo.setWeights(sparkLRModel.weights().toArray());
  logisticRegressionModelInfo.setIntercept(sparkLRModel.intercept());
  logisticRegressionModelInfo.setNumClasses(sparkLRModel.numClasses());
  logisticRegressionModelInfo.setNumFeatures(sparkLRModel.numFeatures());
  logisticRegressionModelInfo.setThreshold((double) sparkLRModel.getThreshold().get());
  Set<String> inputKeys = new LinkedHashSet<String>();
  inputKeys.add("features");
  logisticRegressionModelInfo.setInputKeys(inputKeys);
  Set<String> outputKeys = new LinkedHashSet<String>();
  outputKeys.add("prediction");
  outputKeys.add("probability");
  logisticRegressionModelInfo.setOutputKeys(outputKeys);
  return logisticRegressionModelInfo;
}

@Test
public void denseArrayConstruction() {
 Vector v = Vectors.dense(1.0, 2.0, 3.0);
 assertArrayEquals(new double[]{1.0, 2.0, 3.0}, v.toArray(), 0.0);
}

@Test
public void denseArrayConstruction() {
 Vector v = Vectors.dense(1.0, 2.0, 3.0);
 assertArrayEquals(new double[]{1.0, 2.0, 3.0}, v.toArray(), 0.0);
}

@Test
public void denseArrayConstruction() {
 Vector v = Vectors.dense(1.0, 2.0, 3.0);
 assertArrayEquals(new double[]{1.0, 2.0, 3.0}, v.toArray(), 0.0);
}

@Override
public LogisticRegressionModelInfo getModelInfo(final LogisticRegressionModel sparkLRModel, DataFrame df) {
  final LogisticRegressionModelInfo logisticRegressionModelInfo = new LogisticRegressionModelInfo();
  logisticRegressionModelInfo.setWeights(sparkLRModel.coefficients().toArray());
  logisticRegressionModelInfo.setIntercept(sparkLRModel.intercept());
  logisticRegressionModelInfo.setNumClasses(sparkLRModel.numClasses());
  logisticRegressionModelInfo.setNumFeatures(sparkLRModel.numFeatures());
  logisticRegressionModelInfo.setThreshold(sparkLRModel.getThreshold());
  logisticRegressionModelInfo.setProbabilityKey(sparkLRModel.getProbabilityCol());
  Set<String> inputKeys = new LinkedHashSet<String>();
  inputKeys.add(sparkLRModel.getFeaturesCol());
  logisticRegressionModelInfo.setInputKeys(inputKeys);
  Set<String> outputKeys = new LinkedHashSet<String>();
  outputKeys.add(sparkLRModel.getPredictionCol());
  outputKeys.add(sparkLRModel.getProbabilityCol());
  logisticRegressionModelInfo.setOutputKeys(outputKeys);
  return logisticRegressionModelInfo;
}

 @Test
 public void twoDimensionalData() {
  JavaRDD<Vector> points = jsc.parallelize(Arrays.asList(
   Vectors.dense(4, -1),
   Vectors.dense(4, 1),
   Vectors.sparse(2, new int[]{0}, new double[]{1.0})
  ), 2);

  BisectingKMeans bkm = new BisectingKMeans()
   .setK(4)
   .setMaxIterations(2)
   .setSeed(1L);
  BisectingKMeansModel model = bkm.run(points);
  Assert.assertEquals(3, model.k());
  Assert.assertArrayEquals(new double[]{3.0, 0.0}, model.root().center().toArray(), 1e-12);
  for (ClusteringTreeNode child : model.root().children()) {
   double[] center = child.center().toArray();
   if (center[0] > 2) {
    Assert.assertEquals(2, child.size());
    Assert.assertArrayEquals(new double[]{4.0, 0.0}, center, 1e-12);
   } else {
    Assert.assertEquals(1, child.size());
    Assert.assertArrayEquals(new double[]{1.0, 0.0}, center, 1e-12);
   }
  }
 }
}

 @Test
 public void twoDimensionalData() {
  JavaRDD<Vector> points = jsc.parallelize(Arrays.asList(
   Vectors.dense(4, -1),
   Vectors.dense(4, 1),
   Vectors.sparse(2, new int[]{0}, new double[]{1.0})
  ), 2);

  BisectingKMeans bkm = new BisectingKMeans()
   .setK(4)
   .setMaxIterations(2)
   .setSeed(1L);
  BisectingKMeansModel model = bkm.run(points);
  Assert.assertEquals(3, model.k());
  Assert.assertArrayEquals(new double[]{3.0, 0.0}, model.root().center().toArray(), 1e-12);
  for (ClusteringTreeNode child : model.root().children()) {
   double[] center = child.center().toArray();
   if (center[0] > 2) {
    Assert.assertEquals(2, child.size());
    Assert.assertArrayEquals(new double[]{4.0, 0.0}, center, 1e-12);
   } else {
    Assert.assertEquals(1, child.size());
    Assert.assertArrayEquals(new double[]{1.0, 0.0}, center, 1e-12);
   }
  }
 }
}

Popular methods of Vector

Popular in Java

Making http requests using okhttp
onRequestPermissionsResult (Fragment)
notifyDataSetChanged (ArrayAdapter)
runOnUiThread (Activity)
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
KeyStore (java.security)
KeyStore is responsible for maintaining cryptographic keys and their owners. The type of the syste
TimeZone (java.util)
TimeZone represents a time zone offset, and also figures out daylight savings. Typically, you get a
CountDownLatch (java.util.concurrent)
A synchronization aid that allows one or more threads to wait until a set of operations being perfor
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
Top Sublime Text plugins

How to use toArraymethodin org.apache.spark.mllib.linalg.Vector

Best Java code snippets using org.apache.spark.mllib.linalg.Vector.toArray (Showing top 20 results out of 315)

How to use
toArray
method
in
org.apache.spark.mllib.linalg.Vector