org.apache.spark.ml.regression java code examples

@Override
public MiningFunction getMiningFunction(){
  GeneralizedLinearRegressionModel model = getTransformer();
  String family = model.getFamily();
  switch(family){
    case "binomial":
      return MiningFunction.CLASSIFICATION;
    default:
      return MiningFunction.REGRESSION;
  }
}

@Override
public void execute() {
  LinearRegression lr = new LinearRegression()
      .setMaxIter(numIterations)
      .setRegParam(regularization)
      .setElasticNetParam(elasticNet);
  LinearRegressionModel lrModel = lr.fit(training);
      + lrModel.coefficients() + " Intercept: " + lrModel.intercept());
  LinearRegressionTrainingSummary trainingSummary = lrModel.summary();
  System.out.println("numIterations: " + trainingSummary.totalIterations());
  System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary.objectiveHistory()));
  trainingSummary.residuals().show();
  System.out.println("RMSE: " + trainingSummary.rootMeanSquaredError());
  System.out.println("r2: " + trainingSummary.r2());

public DecisionTreeModelInfo getModelInfo(final DecisionTreeRegressionModel decisionTreeModel) {
  final DecisionTreeModelInfo treeInfo = new DecisionTreeModelInfo();
  Node rootNode = decisionTreeModel.rootNode();
  treeInfo.setRoot( DecisionNodeAdapterUtils.adaptNode(rootNode));
  final Set<String> inputKeys = new LinkedHashSet<String>();
  inputKeys.add(decisionTreeModel.getFeaturesCol());
  inputKeys.add(decisionTreeModel.getLabelCol());
  treeInfo.setInputKeys(inputKeys);
  final Set<String> outputKeys = new LinkedHashSet<String>();
  outputKeys.add(decisionTreeModel.getPredictionCol());
  treeInfo.setOutputKeys(outputKeys);
  return treeInfo;
}

Dataset<Row> dataFrame = TreeTests.setMetadata(data, categoricalFeatures, 0);
GBTRegressor rf = new GBTRegressor()
 .setMaxDepth(2)
 .setMaxBins(10)
 .setMinInstancesPerNode(5)
 .setMinInfoGain(0.0)
 .setMaxMemoryInMB(256)
 .setCacheNodeIds(false)
 .setCheckpointInterval(10)
 .setSubsamplingRate(1.0)
 .setSeed(1234)
 .setMaxIter(3)
 .setStepSize(0.1)
 .setMaxDepth(2); // duplicate setMaxDepth to check builder pattern
for (String lossType : GBTRegressor.supportedLossTypes()) {
 rf.setLossType(lossType);
GBTRegressionModel model = rf.fit(dataFrame);
model.transform(dataFrame);
model.totalNumNodes();
model.toDebugString();
model.trees();
model.treeWeights();

RandomForestRegressor rf = new RandomForestRegressor()
 .setMaxDepth(2)
 .setMaxBins(10)
 .setMinInstancesPerNode(5)
 .setMinInfoGain(0.0)
 .setMaxMemoryInMB(256)
 .setCacheNodeIds(false)
 .setCheckpointInterval(10)
 .setSubsamplingRate(1.0)
 .setSeed(1234)
 .setNumTrees(3)
 .setMaxDepth(2); // duplicate setMaxDepth to check builder pattern
for (String impurity : RandomForestRegressor.supportedImpurities()) {
 rf.setImpurity(impurity);
for (String featureSubsetStrategy : RandomForestRegressor.supportedFeatureSubsetStrategies()) {
 rf.setFeatureSubsetStrategy(featureSubsetStrategy);
 rf.setFeatureSubsetStrategy(strategy);
 rf.setFeatureSubsetStrategy(strategy);
  rf.setFeatureSubsetStrategy(strategy);
  Assert.fail("Expected exception to be thrown for invalid strategies");
 } catch (Exception e) {
RandomForestRegressionModel model = rf.fit(dataFrame);
model.transform(dataFrame);

DecisionTreeRegressor dt = new DecisionTreeRegressor()
 .setMaxDepth(2)
 .setMaxBins(10)
 .setMinInstancesPerNode(5)
 .setMinInfoGain(0.0)
 .setMaxMemoryInMB(256)
 .setCacheNodeIds(false)
 .setCheckpointInterval(10)
 .setMaxDepth(2); // duplicate setMaxDepth to check builder pattern
for (String impurity : DecisionTreeRegressor.supportedImpurities()) {
 dt.setImpurity(impurity);
DecisionTreeRegressionModel model = dt.fit(dataFrame);
model.transform(dataFrame);
model.numNodes();
model.depth();
model.toDebugString();

 @Test
 public void linearRegressionWithSetters() {
  // Set params, train, and check as many params as we can.
  LinearRegression lr = new LinearRegression()
   .setMaxIter(10)
   .setRegParam(1.0).setSolver("l-bfgs");
  LinearRegressionModel model = lr.fit(dataset);
  LinearRegression parent = (LinearRegression) model.parent();
  assertEquals(10, parent.getMaxIter());
  assertEquals(1.0, parent.getRegParam(), 0.0);

  // Call fit() with new params, and check as many params as we can.
  LinearRegressionModel model2 =
   lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.predictionCol().w("thePred"));
  LinearRegression parent2 = (LinearRegression) model2.parent();
  assertEquals(5, parent2.getMaxIter());
  assertEquals(0.1, parent2.getRegParam(), 0.0);
  assertEquals("thePred", model2.getPredictionCol());
 }
}

@Test
public void linearRegressionDefaultParams() {
 LinearRegression lr = new LinearRegression();
 assertEquals("label", lr.getLabelCol());
 assertEquals("auto", lr.getSolver());
 LinearRegressionModel model = lr.fit(dataset);
 model.transform(dataset).createOrReplaceTempView("prediction");
 Dataset<Row> predictions = spark.sql("SELECT label, prediction FROM prediction");
 predictions.collect();
 // Check defaults
 assertEquals("features", model.getFeaturesCol());
 assertEquals("prediction", model.getPredictionCol());
}

df.show();
LinearRegression lr = new LinearRegression().setMaxIter(20);// .setRegParam(1).setElasticNetParam(1);
LinearRegressionModel model = lr.fit(df);
model.transform(df).show();
LinearRegressionTrainingSummary trainingSummary = model.summary();
System.out.println("numIterations: " + trainingSummary.totalIterations());
System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary
  .objectiveHistory()));
trainingSummary.residuals().show();
System.out.println("RMSE: " + trainingSummary.rootMeanSquaredError());
System.out.println("r2: " + trainingSummary.r2());
double intercept = model.intercept();
System.out.println("Interesection: " + intercept);
double regParam = model.getRegParam();
System.out.println("Regression parameter: " + regParam);
double tol = model.getTol();
System.out.println("Tol: " + tol);
Double feature = 7.0;
Vector features = Vectors.dense(feature);
double p = model.predict(features);

  @Override
  public RegressionModel encodeModel(Schema schema){
    LinearRegressionModel model = getTransformer();

    List<Feature> features = new ArrayList<>(schema.getFeatures());
    List<Double> coefficients = new ArrayList<>(VectorUtil.toList(model.coefficients()));

    RegressionTableUtil.simplify(this, null, features, coefficients);

    return RegressionModelUtil.createRegression(features, coefficients, model.intercept(), null, schema);
  }
}

  @Override
  public MiningModel encodeModel(Schema schema){
    GBTRegressionModel model = getTransformer();

    List<TreeModel> treeModels = TreeModelUtil.encodeDecisionTreeEnsemble(this, schema);

    MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()))
      .setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.WEIGHTED_SUM, treeModels, Doubles.asList(model.treeWeights())));

    return miningModel;
  }
}

Dataset<Row> dataFrame = TreeTests.setMetadata(data, categoricalFeatures, 0);
GBTRegressor rf = new GBTRegressor()
 .setMaxDepth(2)
 .setMaxBins(10)
 .setMinInstancesPerNode(5)
 .setMinInfoGain(0.0)
 .setMaxMemoryInMB(256)
 .setCacheNodeIds(false)
 .setCheckpointInterval(10)
 .setSubsamplingRate(1.0)
 .setSeed(1234)
 .setMaxIter(3)
 .setStepSize(0.1)
 .setMaxDepth(2); // duplicate setMaxDepth to check builder pattern
for (String lossType : GBTRegressor.supportedLossTypes()) {
 rf.setLossType(lossType);
GBTRegressionModel model = rf.fit(dataFrame);
model.transform(dataFrame);
model.totalNumNodes();
model.toDebugString();
model.trees();
model.treeWeights();

DecisionTreeRegressor dt = new DecisionTreeRegressor()
 .setMaxDepth(2)
 .setMaxBins(10)
 .setMinInstancesPerNode(5)
 .setMinInfoGain(0.0)
 .setMaxMemoryInMB(256)
 .setCacheNodeIds(false)
 .setCheckpointInterval(10)
 .setMaxDepth(2); // duplicate setMaxDepth to check builder pattern
for (String impurity : DecisionTreeRegressor.supportedImpurities()) {
 dt.setImpurity(impurity);
DecisionTreeRegressionModel model = dt.fit(dataFrame);
model.transform(dataFrame);
model.numNodes();
model.depth();
model.toDebugString();

 @Test
 public void linearRegressionWithSetters() {
  // Set params, train, and check as many params as we can.
  LinearRegression lr = new LinearRegression()
   .setMaxIter(10)
   .setRegParam(1.0).setSolver("l-bfgs");
  LinearRegressionModel model = lr.fit(dataset);
  LinearRegression parent = (LinearRegression) model.parent();
  assertEquals(10, parent.getMaxIter());
  assertEquals(1.0, parent.getRegParam(), 0.0);

  // Call fit() with new params, and check as many params as we can.
  LinearRegressionModel model2 =
   lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.predictionCol().w("thePred"));
  LinearRegression parent2 = (LinearRegression) model2.parent();
  assertEquals(5, parent2.getMaxIter());
  assertEquals(0.1, parent2.getRegParam(), 0.0);
  assertEquals("thePred", model2.getPredictionCol());
 }
}

@Test
public void linearRegressionDefaultParams() {
 LinearRegression lr = new LinearRegression();
 assertEquals("label", lr.getLabelCol());
 assertEquals("auto", lr.getSolver());
 LinearRegressionModel model = lr.fit(dataset);
 model.transform(dataset).createOrReplaceTempView("prediction");
 Dataset<Row> predictions = spark.sql("SELECT label, prediction FROM prediction");
 predictions.collect();
 // Check defaults
 assertEquals("features", model.getFeaturesCol());
 assertEquals("prediction", model.getPredictionCol());
}

public DecisionTreeModelInfo getModelInfo(final DecisionTreeRegressionModel decisionTreeModel, final DataFrame df) {
  final DecisionTreeModelInfo treeInfo = new DecisionTreeModelInfo();
  Node rootNode = decisionTreeModel.rootNode();
  treeInfo.setRoot( DecisionNodeAdapterUtils.adaptNode(rootNode));
  final Set<String> inputKeys = new LinkedHashSet<String>();
  inputKeys.add(decisionTreeModel.getFeaturesCol());
  inputKeys.add(decisionTreeModel.getLabelCol());
  treeInfo.setInputKeys(inputKeys);
  final Set<String> outputKeys = new LinkedHashSet<String>();
  outputKeys.add(decisionTreeModel.getPredictionCol());
  treeInfo.setOutputKeys(outputKeys);
  return treeInfo;
}

Dataset<Row> dataFrame = TreeTests.setMetadata(data, categoricalFeatures, 0);
GBTRegressor rf = new GBTRegressor()
 .setMaxDepth(2)
 .setMaxBins(10)
 .setMinInstancesPerNode(5)
 .setMinInfoGain(0.0)
 .setMaxMemoryInMB(256)
 .setCacheNodeIds(false)
 .setCheckpointInterval(10)
 .setSubsamplingRate(1.0)
 .setSeed(1234)
 .setMaxIter(3)
 .setStepSize(0.1)
 .setMaxDepth(2); // duplicate setMaxDepth to check builder pattern
for (String lossType : GBTRegressor.supportedLossTypes()) {
 rf.setLossType(lossType);
GBTRegressionModel model = rf.fit(dataFrame);
model.transform(dataFrame);
model.totalNumNodes();
model.toDebugString();
model.trees();
model.treeWeights();

DecisionTreeRegressor dt = new DecisionTreeRegressor()
 .setMaxDepth(2)
 .setMaxBins(10)
 .setMinInstancesPerNode(5)
 .setMinInfoGain(0.0)
 .setMaxMemoryInMB(256)
 .setCacheNodeIds(false)
 .setCheckpointInterval(10)
 .setMaxDepth(2); // duplicate setMaxDepth to check builder pattern
for (String impurity : DecisionTreeRegressor.supportedImpurities()) {
 dt.setImpurity(impurity);
DecisionTreeRegressionModel model = dt.fit(dataFrame);
model.transform(dataFrame);
model.numNodes();
model.depth();
model.toDebugString();

 @Test
 public void linearRegressionWithSetters() {
  // Set params, train, and check as many params as we can.
  LinearRegression lr = new LinearRegression()
   .setMaxIter(10)
   .setRegParam(1.0).setSolver("l-bfgs");
  LinearRegressionModel model = lr.fit(dataset);
  LinearRegression parent = (LinearRegression) model.parent();
  assertEquals(10, parent.getMaxIter());
  assertEquals(1.0, parent.getRegParam(), 0.0);

  // Call fit() with new params, and check as many params as we can.
  LinearRegressionModel model2 =
   lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.predictionCol().w("thePred"));
  LinearRegression parent2 = (LinearRegression) model2.parent();
  assertEquals(5, parent2.getMaxIter());
  assertEquals(0.1, parent2.getRegParam(), 0.0);
  assertEquals("thePred", model2.getPredictionCol());
 }
}

@Test
public void linearRegressionDefaultParams() {
 LinearRegression lr = new LinearRegression();
 assertEquals("label", lr.getLabelCol());
 assertEquals("auto", lr.getSolver());
 LinearRegressionModel model = lr.fit(dataset);
 model.transform(dataset).createOrReplaceTempView("prediction");
 Dataset<Row> predictions = spark.sql("SELECT label, prediction FROM prediction");
 predictions.collect();
 // Check defaults
 assertEquals("features", model.getFeaturesCol());
 assertEquals("prediction", model.getPredictionCol());
}

How to use org.apache.spark.ml.regression

Best Java code snippets using org.apache.spark.ml.regression (Showing top 20 results out of 315)