@Override public MiningFunction getMiningFunction(){ GeneralizedLinearRegressionModel model = getTransformer(); String family = model.getFamily(); switch(family){ case "binomial": return MiningFunction.CLASSIFICATION; default: return MiningFunction.REGRESSION; } }
@Override public void execute() { LinearRegression lr = new LinearRegression() .setMaxIter(numIterations) .setRegParam(regularization) .setElasticNetParam(elasticNet); LinearRegressionModel lrModel = lr.fit(training); + lrModel.coefficients() + " Intercept: " + lrModel.intercept()); LinearRegressionTrainingSummary trainingSummary = lrModel.summary(); System.out.println("numIterations: " + trainingSummary.totalIterations()); System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary.objectiveHistory())); trainingSummary.residuals().show(); System.out.println("RMSE: " + trainingSummary.rootMeanSquaredError()); System.out.println("r2: " + trainingSummary.r2());
public DecisionTreeModelInfo getModelInfo(final DecisionTreeRegressionModel decisionTreeModel) { final DecisionTreeModelInfo treeInfo = new DecisionTreeModelInfo(); Node rootNode = decisionTreeModel.rootNode(); treeInfo.setRoot( DecisionNodeAdapterUtils.adaptNode(rootNode)); final Set<String> inputKeys = new LinkedHashSet<String>(); inputKeys.add(decisionTreeModel.getFeaturesCol()); inputKeys.add(decisionTreeModel.getLabelCol()); treeInfo.setInputKeys(inputKeys); final Set<String> outputKeys = new LinkedHashSet<String>(); outputKeys.add(decisionTreeModel.getPredictionCol()); treeInfo.setOutputKeys(outputKeys); return treeInfo; }
Dataset<Row> dataFrame = TreeTests.setMetadata(data, categoricalFeatures, 0); GBTRegressor rf = new GBTRegressor() .setMaxDepth(2) .setMaxBins(10) .setMinInstancesPerNode(5) .setMinInfoGain(0.0) .setMaxMemoryInMB(256) .setCacheNodeIds(false) .setCheckpointInterval(10) .setSubsamplingRate(1.0) .setSeed(1234) .setMaxIter(3) .setStepSize(0.1) .setMaxDepth(2); // duplicate setMaxDepth to check builder pattern for (String lossType : GBTRegressor.supportedLossTypes()) { rf.setLossType(lossType); GBTRegressionModel model = rf.fit(dataFrame); model.transform(dataFrame); model.totalNumNodes(); model.toDebugString(); model.trees(); model.treeWeights();
RandomForestRegressor rf = new RandomForestRegressor() .setMaxDepth(2) .setMaxBins(10) .setMinInstancesPerNode(5) .setMinInfoGain(0.0) .setMaxMemoryInMB(256) .setCacheNodeIds(false) .setCheckpointInterval(10) .setSubsamplingRate(1.0) .setSeed(1234) .setNumTrees(3) .setMaxDepth(2); // duplicate setMaxDepth to check builder pattern for (String impurity : RandomForestRegressor.supportedImpurities()) { rf.setImpurity(impurity); for (String featureSubsetStrategy : RandomForestRegressor.supportedFeatureSubsetStrategies()) { rf.setFeatureSubsetStrategy(featureSubsetStrategy); rf.setFeatureSubsetStrategy(strategy); rf.setFeatureSubsetStrategy(strategy); rf.setFeatureSubsetStrategy(strategy); Assert.fail("Expected exception to be thrown for invalid strategies"); } catch (Exception e) { RandomForestRegressionModel model = rf.fit(dataFrame); model.transform(dataFrame);
DecisionTreeRegressor dt = new DecisionTreeRegressor() .setMaxDepth(2) .setMaxBins(10) .setMinInstancesPerNode(5) .setMinInfoGain(0.0) .setMaxMemoryInMB(256) .setCacheNodeIds(false) .setCheckpointInterval(10) .setMaxDepth(2); // duplicate setMaxDepth to check builder pattern for (String impurity : DecisionTreeRegressor.supportedImpurities()) { dt.setImpurity(impurity); DecisionTreeRegressionModel model = dt.fit(dataFrame); model.transform(dataFrame); model.numNodes(); model.depth(); model.toDebugString();
@Test public void linearRegressionWithSetters() { // Set params, train, and check as many params as we can. LinearRegression lr = new LinearRegression() .setMaxIter(10) .setRegParam(1.0).setSolver("l-bfgs"); LinearRegressionModel model = lr.fit(dataset); LinearRegression parent = (LinearRegression) model.parent(); assertEquals(10, parent.getMaxIter()); assertEquals(1.0, parent.getRegParam(), 0.0); // Call fit() with new params, and check as many params as we can. LinearRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.predictionCol().w("thePred")); LinearRegression parent2 = (LinearRegression) model2.parent(); assertEquals(5, parent2.getMaxIter()); assertEquals(0.1, parent2.getRegParam(), 0.0); assertEquals("thePred", model2.getPredictionCol()); } }
@Test public void linearRegressionDefaultParams() { LinearRegression lr = new LinearRegression(); assertEquals("label", lr.getLabelCol()); assertEquals("auto", lr.getSolver()); LinearRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, prediction FROM prediction"); predictions.collect(); // Check defaults assertEquals("features", model.getFeaturesCol()); assertEquals("prediction", model.getPredictionCol()); }
df.show(); LinearRegression lr = new LinearRegression().setMaxIter(20);// .setRegParam(1).setElasticNetParam(1); LinearRegressionModel model = lr.fit(df); model.transform(df).show(); LinearRegressionTrainingSummary trainingSummary = model.summary(); System.out.println("numIterations: " + trainingSummary.totalIterations()); System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary .objectiveHistory())); trainingSummary.residuals().show(); System.out.println("RMSE: " + trainingSummary.rootMeanSquaredError()); System.out.println("r2: " + trainingSummary.r2()); double intercept = model.intercept(); System.out.println("Interesection: " + intercept); double regParam = model.getRegParam(); System.out.println("Regression parameter: " + regParam); double tol = model.getTol(); System.out.println("Tol: " + tol); Double feature = 7.0; Vector features = Vectors.dense(feature); double p = model.predict(features);
@Override public RegressionModel encodeModel(Schema schema){ LinearRegressionModel model = getTransformer(); List<Feature> features = new ArrayList<>(schema.getFeatures()); List<Double> coefficients = new ArrayList<>(VectorUtil.toList(model.coefficients())); RegressionTableUtil.simplify(this, null, features, coefficients); return RegressionModelUtil.createRegression(features, coefficients, model.intercept(), null, schema); } }
@Override public MiningModel encodeModel(Schema schema){ GBTRegressionModel model = getTransformer(); List<TreeModel> treeModels = TreeModelUtil.encodeDecisionTreeEnsemble(this, schema); MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel())) .setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.WEIGHTED_SUM, treeModels, Doubles.asList(model.treeWeights()))); return miningModel; } }
Dataset<Row> dataFrame = TreeTests.setMetadata(data, categoricalFeatures, 0); GBTRegressor rf = new GBTRegressor() .setMaxDepth(2) .setMaxBins(10) .setMinInstancesPerNode(5) .setMinInfoGain(0.0) .setMaxMemoryInMB(256) .setCacheNodeIds(false) .setCheckpointInterval(10) .setSubsamplingRate(1.0) .setSeed(1234) .setMaxIter(3) .setStepSize(0.1) .setMaxDepth(2); // duplicate setMaxDepth to check builder pattern for (String lossType : GBTRegressor.supportedLossTypes()) { rf.setLossType(lossType); GBTRegressionModel model = rf.fit(dataFrame); model.transform(dataFrame); model.totalNumNodes(); model.toDebugString(); model.trees(); model.treeWeights();
DecisionTreeRegressor dt = new DecisionTreeRegressor() .setMaxDepth(2) .setMaxBins(10) .setMinInstancesPerNode(5) .setMinInfoGain(0.0) .setMaxMemoryInMB(256) .setCacheNodeIds(false) .setCheckpointInterval(10) .setMaxDepth(2); // duplicate setMaxDepth to check builder pattern for (String impurity : DecisionTreeRegressor.supportedImpurities()) { dt.setImpurity(impurity); DecisionTreeRegressionModel model = dt.fit(dataFrame); model.transform(dataFrame); model.numNodes(); model.depth(); model.toDebugString();
@Test public void linearRegressionWithSetters() { // Set params, train, and check as many params as we can. LinearRegression lr = new LinearRegression() .setMaxIter(10) .setRegParam(1.0).setSolver("l-bfgs"); LinearRegressionModel model = lr.fit(dataset); LinearRegression parent = (LinearRegression) model.parent(); assertEquals(10, parent.getMaxIter()); assertEquals(1.0, parent.getRegParam(), 0.0); // Call fit() with new params, and check as many params as we can. LinearRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.predictionCol().w("thePred")); LinearRegression parent2 = (LinearRegression) model2.parent(); assertEquals(5, parent2.getMaxIter()); assertEquals(0.1, parent2.getRegParam(), 0.0); assertEquals("thePred", model2.getPredictionCol()); } }
@Test public void linearRegressionDefaultParams() { LinearRegression lr = new LinearRegression(); assertEquals("label", lr.getLabelCol()); assertEquals("auto", lr.getSolver()); LinearRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, prediction FROM prediction"); predictions.collect(); // Check defaults assertEquals("features", model.getFeaturesCol()); assertEquals("prediction", model.getPredictionCol()); }
public DecisionTreeModelInfo getModelInfo(final DecisionTreeRegressionModel decisionTreeModel, final DataFrame df) { final DecisionTreeModelInfo treeInfo = new DecisionTreeModelInfo(); Node rootNode = decisionTreeModel.rootNode(); treeInfo.setRoot( DecisionNodeAdapterUtils.adaptNode(rootNode)); final Set<String> inputKeys = new LinkedHashSet<String>(); inputKeys.add(decisionTreeModel.getFeaturesCol()); inputKeys.add(decisionTreeModel.getLabelCol()); treeInfo.setInputKeys(inputKeys); final Set<String> outputKeys = new LinkedHashSet<String>(); outputKeys.add(decisionTreeModel.getPredictionCol()); treeInfo.setOutputKeys(outputKeys); return treeInfo; }
Dataset<Row> dataFrame = TreeTests.setMetadata(data, categoricalFeatures, 0); GBTRegressor rf = new GBTRegressor() .setMaxDepth(2) .setMaxBins(10) .setMinInstancesPerNode(5) .setMinInfoGain(0.0) .setMaxMemoryInMB(256) .setCacheNodeIds(false) .setCheckpointInterval(10) .setSubsamplingRate(1.0) .setSeed(1234) .setMaxIter(3) .setStepSize(0.1) .setMaxDepth(2); // duplicate setMaxDepth to check builder pattern for (String lossType : GBTRegressor.supportedLossTypes()) { rf.setLossType(lossType); GBTRegressionModel model = rf.fit(dataFrame); model.transform(dataFrame); model.totalNumNodes(); model.toDebugString(); model.trees(); model.treeWeights();
DecisionTreeRegressor dt = new DecisionTreeRegressor() .setMaxDepth(2) .setMaxBins(10) .setMinInstancesPerNode(5) .setMinInfoGain(0.0) .setMaxMemoryInMB(256) .setCacheNodeIds(false) .setCheckpointInterval(10) .setMaxDepth(2); // duplicate setMaxDepth to check builder pattern for (String impurity : DecisionTreeRegressor.supportedImpurities()) { dt.setImpurity(impurity); DecisionTreeRegressionModel model = dt.fit(dataFrame); model.transform(dataFrame); model.numNodes(); model.depth(); model.toDebugString();
@Test public void linearRegressionWithSetters() { // Set params, train, and check as many params as we can. LinearRegression lr = new LinearRegression() .setMaxIter(10) .setRegParam(1.0).setSolver("l-bfgs"); LinearRegressionModel model = lr.fit(dataset); LinearRegression parent = (LinearRegression) model.parent(); assertEquals(10, parent.getMaxIter()); assertEquals(1.0, parent.getRegParam(), 0.0); // Call fit() with new params, and check as many params as we can. LinearRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.predictionCol().w("thePred")); LinearRegression parent2 = (LinearRegression) model2.parent(); assertEquals(5, parent2.getMaxIter()); assertEquals(0.1, parent2.getRegParam(), 0.0); assertEquals("thePred", model2.getPredictionCol()); } }
@Test public void linearRegressionDefaultParams() { LinearRegression lr = new LinearRegression(); assertEquals("label", lr.getLabelCol()); assertEquals("auto", lr.getSolver()); LinearRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, prediction FROM prediction"); predictions.collect(); // Check defaults assertEquals("features", model.getFeaturesCol()); assertEquals("prediction", model.getPredictionCol()); }