org.apache.spark.mllib.regression java code examples

 @Override public LabeledPoint call(String email) {
  return new LabeledPoint(1, tf.transform(Arrays.asList(email.split(" "))));
 }
});

  @Override
  public Tuple2<Double, Double> call(LabeledPoint point) {
    double prediction = model.predict(point.features());
    return new Tuple2<Double, Double>(prediction, point.label());
  }
}

  mapToObj(i -> new IntLongHashMap()).collect(Collectors.toList());
data.forEachRemaining(datum -> {
 double[] featureVector = datum.features().toArray();
 for (int i = 0; i < trees.length; i++) {
  DecisionTreeModel tree = trees[i];

  @Override 
  public Tuple2<Double, Double> call(LabeledPoint p) {
    return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
  }
});

   LinearRegressionWithSGD.train(JavaRDD.toRDD(trainingData), numberOfIterations, stepSize);
THE_LOGGER.info("LinearRegressionModel weights: " + model.weights());
THE_LOGGER.info("LinearRegressionModel intercept: " + model.intercept());
model.save(context.sc(), builtModelPath);
THE_LOGGER.info("model saved at: builtModelPath=" + builtModelPath);

  @Override
  public Tuple2<String, Double> call(String record) {    
    // each record has this format:
    //  <Age><,><KM><,><FuelType1><,><FuelType2><,><HP><,><MetColor><,><Automatic><,><CC><,><Doors><,><Weight>
    String[] tokens = StringUtils.split(record, ","); 
    double[] features = new double[tokens.length];
    for (int i = 0; i < features.length; i++) {
      features[i] = Double.parseDouble(tokens[i]);
    }
    // 
    double carPricePrediction = model.predict(Vectors.dense(features));
    //
    return new Tuple2<String, Double>(record, carPricePrediction);
  }
});

final LinearRegressionModel model = LinearRegressionModel.load(context.sc(), savedModelPath);

private static int validatePrediction(
  List<LabeledPoint> validationData, LinearRegressionModel model) {
 int numAccurate = 0;
 for (LabeledPoint point : validationData) {
  Double prediction = model.predict(point.features());
  // A prediction is off if the prediction is more than 0.5 away from expected value.
  if (Math.abs(prediction - point.label()) <= 0.5) {
   numAccurate++;
  }
 }
 return numAccurate;
}

  @Override 
  public Tuple2<Double, Double> call(LabeledPoint p) {
    return new Tuple2<Double, Double>(model.predict(p.features()), p.label());
  }
});

 @Override public LabeledPoint call(String email) {
  return new LabeledPoint(0, tf.transform(Arrays.asList(email.split(" "))));
 }
});

/**
 * @param trainPointData data to run down trees
 * @param model random decision forest model to count on
 * @return map of predictor index to the number of training examples that reached a
 *  node whose decision is based on that feature. The index is among predictors, not all
 *  features, since there are fewer predictors than features. That is, the index will
 *  match the one used in the {@link RandomForestModel}.
 */
private static IntLongHashMap predictorExampleCounts(JavaRDD<? extends LabeledPoint> trainPointData,
                           RandomForestModel model) {
 return trainPointData.mapPartitions(data -> {
   IntLongHashMap featureIndexCount = new IntLongHashMap();
   data.forEachRemaining(datum -> {
    double[] featureVector = datum.features().toArray();
    for (DecisionTreeModel tree : model.trees()) {
     org.apache.spark.mllib.tree.model.Node node = tree.topNode();
     // This logic cloned from Node.predict:
     while (!node.isLeaf()) {
      Split split = node.split().get();
      int featureIndex = split.feature();
      // Count feature
      featureIndexCount.addToValue(featureIndex, 1);
      node = nextNode(featureVector, node, split, featureIndex);
     }
    }
   });
   return Collections.singleton(featureIndexCount).iterator();
 }).reduce(RDFUpdate::merge);
}

final LinearRegressionModel model = LinearRegressionModel.load(context.sc(), savedModelPath);

test.mapToPair((LabeledPoint p) -> new Tuple2<Double, Double>(model.predict(p.features()), p.label()));

 return new LabeledPoint(target, Vectors.dense(features));
} catch (NumberFormatException | ArrayIndexOutOfBoundsException e) {
 log.warn("Bad input: {}", Arrays.toString(data));

  @Override
  public LabeledPoint call(String email) {
    return new LabeledPoint(label, tf.transform(Arrays.asList(email.split(" "))));
  }
});

  @Override
  public LabeledPoint call(String record) {
    // record: <Price><,><Age><,><KM><,><FuelType1><,><FuelType2><,><HP><,><MetColor><,><Automatic><,><CC><,><Doors><,><Weight>
    // tokens[0] = <Price>
    String[] tokens = StringUtils.split(record, ","); 
    double[] features = new double[tokens.length - 1];
    for (int i = 0; i < features.length; i++) {
      features[i] = Double.parseDouble(tokens[i+1]);
    }
    // 
    double price = Double.parseDouble(tokens[0]); 
    return new LabeledPoint(price, Vectors.dense(features));    
  }
});

  @Override
  public LabeledPoint call(String record) {
    String[] tokens = StringUtils.split(record, ","); // 32 tokens
    double[] features = new double[30];
    for (int i = 2; i < features.length; i++) {
      features[i - 2] = Double.parseDouble(tokens[i]);
    }
    // String patientID = tokens[0]; // ignore, not used
    String outcomeClass = tokens[1]; // B=benign, M=malignant
    Vector v = new DenseVector(features);
    if (outcomeClass.equals("B")) {
      return new LabeledPoint(1, v); // benign
    } 
    else {
      return new LabeledPoint(0, v); // malignant
    }
  }
});

  @Override
  public LabeledPoint call(String record) {
    // 9 tokens, the last token is the classification
    String[] tokens = StringUtils.split(record, ",");
    double[] features = new double[8];
    for (int i=0; i < 8; i++) {
      features[i] = Double.parseDouble(tokens[i]);
    } 
    //
    // tokens[8] => classification: 
    // class value 1 is interpreted as "tested positive for diabetes"
    //
    double classification = Double.parseDouble(tokens[8]);
    Vector v = new DenseVector(features);
    // debug(record, v);
    // add a classification for the training data set
    return new LabeledPoint(classification, v);
  }
});

THE_LOGGER.info("training data: classification=" + classification);
return new LabeledPoint(classification, vector);

  @Override
  public LabeledPoint call(String record) {
    String[] tokens = StringUtils.split(record, " "); // 5 tokens
    double[] features = new double[4];
    features[0] = getOutlook(tokens[0]);       // outlook
    features[1] = getTemperature(tokens[1]);   // temperature
    features[2] = getHumidity(tokens[2]);      // humidity 
    features[3] = getWind(tokens[3]);          // windy
    // tokens[4] => classification: play=0 or not-play=1
    double classification = getPlay(tokens[4]); 
    Vector v = new DenseVector(features);
    debug(record, v);
    // add a classification for the training data set
    return new LabeledPoint(classification, v);
  }
});

How to use org.apache.spark.mllib.regression

Best Java code snippets using org.apache.spark.mllib.regression (Showing top 20 results out of 315)