@Test public void logisticRegressionWithSetters() { LogisticRegression lr = new LogisticRegression() .setMaxIter(10) .setRegParam(1.0) .setThreshold(0.6) .setProbabilityCol("myProbability"); LogisticRegressionModel model = lr.fit(dataset); LogisticRegression parent = (LogisticRegression) model.parent(); Assert.assertEquals(10, parent.getMaxIter()); Assert.assertEquals(1.0, parent.getRegParam(), eps); Assert.assertEquals(0.4, parent.getThresholds()[0], eps); Assert.assertEquals(0.6, parent.getThresholds()[1], eps); Assert.assertEquals(0.6, parent.getThreshold(), eps); Assert.assertEquals(0.6, model.getThreshold(), eps); LogisticRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.threshold().w(0.4), lr.probabilityCol().w("theProb")); LogisticRegression parent2 = (LogisticRegression) model2.parent(); Assert.assertEquals(5, parent2.getMaxIter()); Assert.assertEquals(0.1, parent2.getRegParam(), eps); Assert.assertEquals(0.4, parent2.getThreshold(), eps); Assert.assertEquals(0.4, model2.getThreshold(), eps); Assert.assertEquals("theProb", model2.getProbabilityCol());
LogisticRegression lr = new LogisticRegression(); System.out.println("LogisticRegression parameters:\n" + lr.explainParams() + "\n"); lr.setMaxIter(10) .setRegParam(0.01); LogisticRegressionModel model1 = lr.fit(training); .put(lr.maxIter().w(20)) // Specify 1 Param. .put(lr.maxIter(), 30) // This overwrites the original maxIter. .put(lr.regParam().w(0.1), lr.threshold().w(0.55)); // Specify multiple Params. .put(lr.probabilityCol().w("myProbability")); // Change output column name ParamMap paramMapCombined = paramMap.$plus$plus(paramMap2); LogisticRegressionModel model2 = lr.fit(training, paramMapCombined); System.out.println("Model 2 was fit using parameters: " + model2.parent().extractParamMap());
@Test public void logisticRegressionTrainingSummary() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); LogisticRegressionTrainingSummary summary = model.summary(); Assert.assertEquals(summary.totalIterations(), summary.objectiveHistory().length); } }
@Override public void execute() { LogisticRegression lr = new LogisticRegression() .setMaxIter(numIterations) .setRegParam(regularization) .setElasticNetParam(elasticNet); LogisticRegressionModel lrModel = lr.fit(training);
@Test public void crossValidationWithLogisticRegression() { LogisticRegression lr = new LogisticRegression(); ParamMap[] lrParamMaps = new ParamGridBuilder() .addGrid(lr.regParam(), new double[]{0.001, 1000.0}) .addGrid(lr.maxIter(), new int[]{0, 10}) .build(); BinaryClassificationEvaluator eval = new BinaryClassificationEvaluator(); CrossValidator cv = new CrossValidator() .setEstimator(lr) .setEstimatorParamMaps(lrParamMaps) .setEvaluator(eval) .setNumFolds(3); CrossValidatorModel cvModel = cv.fit(dataset); LogisticRegression parent = (LogisticRegression) cvModel.bestModel().parent(); Assert.assertEquals(0.001, parent.getRegParam(), 0.0); Assert.assertEquals(10, parent.getMaxIter()); } }
@Test public void logisticRegressionDefaultParams() { LogisticRegression lr = new LogisticRegression(); Assert.assertEquals(lr.getLabelCol(), "label"); LogisticRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); // Check defaults Assert.assertEquals(0.5, model.getThreshold(), eps); Assert.assertEquals("features", model.getFeaturesCol()); Assert.assertEquals("prediction", model.getPredictionCol()); Assert.assertEquals("probability", model.getProbabilityCol()); }
HashingTF hashingTF = new HashingTF().setNumFeatures(numFeatures) .setInputCol(tokenizer.getOutputCol()).setOutputCol("features"); LogisticRegression lr = new LogisticRegression().setMaxIter(100) .setRegParam(0.01); Pipeline pipeline = new Pipeline().setStages(new PipelineStage[] { tokenizer, hashingTF, lr });
@Test public void pipeline() { StandardScaler scaler = new StandardScaler() .setInputCol("features") .setOutputCol("scaledFeatures"); LogisticRegression lr = new LogisticRegression() .setFeaturesCol("scaledFeatures"); Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[]{scaler, lr}); PipelineModel model = pipeline.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); } }
@Test public void oneVsRestDefaultParams() { OneVsRest ova = new OneVsRest(); ova.setClassifier(new LogisticRegression()); Assert.assertEquals(ova.getLabelCol(), "label"); Assert.assertEquals(ova.getPredictionCol(), "prediction"); OneVsRestModel ovaModel = ova.fit(dataset); Dataset<Row> predictions = ovaModel.transform(dataset).select("label", "prediction"); predictions.collectAsList(); Assert.assertEquals(ovaModel.getLabelCol(), "label"); Assert.assertEquals(ovaModel.getPredictionCol(), "prediction"); } }
@Test public void crossValidationWithLogisticRegression() { LogisticRegression lr = new LogisticRegression(); ParamMap[] lrParamMaps = new ParamGridBuilder() .addGrid(lr.regParam(), new double[]{0.001, 1000.0}) .addGrid(lr.maxIter(), new int[]{0, 10}) .build(); BinaryClassificationEvaluator eval = new BinaryClassificationEvaluator(); CrossValidator cv = new CrossValidator() .setEstimator(lr) .setEstimatorParamMaps(lrParamMaps) .setEvaluator(eval) .setNumFolds(3); CrossValidatorModel cvModel = cv.fit(dataset); LogisticRegression parent = (LogisticRegression) cvModel.bestModel().parent(); Assert.assertEquals(0.001, parent.getRegParam(), 0.0); Assert.assertEquals(10, parent.getMaxIter()); } }
@Test public void logisticRegressionTrainingSummary() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); LogisticRegressionTrainingSummary summary = model.summary(); Assert.assertEquals(summary.totalIterations(), summary.objectiveHistory().length); } }
@Test public void logisticRegressionDefaultParams() { LogisticRegression lr = new LogisticRegression(); Assert.assertEquals(lr.getLabelCol(), "label"); LogisticRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); // Check defaults Assert.assertEquals(0.5, model.getThreshold(), eps); Assert.assertEquals("features", model.getFeaturesCol()); Assert.assertEquals("prediction", model.getPredictionCol()); Assert.assertEquals("probability", model.getProbabilityCol()); }
@Test public void pipeline() { StandardScaler scaler = new StandardScaler() .setInputCol("features") .setOutputCol("scaledFeatures"); LogisticRegression lr = new LogisticRegression() .setFeaturesCol("scaledFeatures"); Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[]{scaler, lr}); PipelineModel model = pipeline.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); } }
@Test public void oneVsRestDefaultParams() { OneVsRest ova = new OneVsRest(); ova.setClassifier(new LogisticRegression()); Assert.assertEquals(ova.getLabelCol(), "label"); Assert.assertEquals(ova.getPredictionCol(), "prediction"); OneVsRestModel ovaModel = ova.fit(dataset); Dataset<Row> predictions = ovaModel.transform(dataset).select("label", "prediction"); predictions.collectAsList(); Assert.assertEquals(ovaModel.getLabelCol(), "label"); Assert.assertEquals(ovaModel.getPredictionCol(), "prediction"); } }
@Test public void logisticRegressionWithSetters() { LogisticRegression lr = new LogisticRegression() .setMaxIter(10) .setRegParam(1.0) .setThreshold(0.6) .setProbabilityCol("myProbability"); LogisticRegressionModel model = lr.fit(dataset); LogisticRegression parent = (LogisticRegression) model.parent(); Assert.assertEquals(10, parent.getMaxIter()); Assert.assertEquals(1.0, parent.getRegParam(), eps); Assert.assertEquals(0.4, parent.getThresholds()[0], eps); Assert.assertEquals(0.6, parent.getThresholds()[1], eps); Assert.assertEquals(0.6, parent.getThreshold(), eps); Assert.assertEquals(0.6, model.getThreshold(), eps); LogisticRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.threshold().w(0.4), lr.probabilityCol().w("theProb")); LogisticRegression parent2 = (LogisticRegression) model2.parent(); Assert.assertEquals(5, parent2.getMaxIter()); Assert.assertEquals(0.1, parent2.getRegParam(), eps); Assert.assertEquals(0.4, parent2.getThreshold(), eps); Assert.assertEquals(0.4, model2.getThreshold(), eps); Assert.assertEquals("theProb", model2.getProbabilityCol());
@Test public void crossValidationWithLogisticRegression() { LogisticRegression lr = new LogisticRegression(); ParamMap[] lrParamMaps = new ParamGridBuilder() .addGrid(lr.regParam(), new double[]{0.001, 1000.0}) .addGrid(lr.maxIter(), new int[]{0, 10}) .build(); BinaryClassificationEvaluator eval = new BinaryClassificationEvaluator(); CrossValidator cv = new CrossValidator() .setEstimator(lr) .setEstimatorParamMaps(lrParamMaps) .setEvaluator(eval) .setNumFolds(3); CrossValidatorModel cvModel = cv.fit(dataset); LogisticRegression parent = (LogisticRegression) cvModel.bestModel().parent(); Assert.assertEquals(0.001, parent.getRegParam(), 0.0); Assert.assertEquals(10, parent.getMaxIter()); } }
@Test public void logisticRegressionTrainingSummary() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); LogisticRegressionTrainingSummary summary = model.summary(); Assert.assertEquals(summary.totalIterations(), summary.objectiveHistory().length); } }
@Test public void logisticRegressionDefaultParams() { LogisticRegression lr = new LogisticRegression(); Assert.assertEquals(lr.getLabelCol(), "label"); LogisticRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); // Check defaults Assert.assertEquals(0.5, model.getThreshold(), eps); Assert.assertEquals("features", model.getFeaturesCol()); Assert.assertEquals("prediction", model.getPredictionCol()); Assert.assertEquals("probability", model.getProbabilityCol()); }
@Test public void pipeline() { StandardScaler scaler = new StandardScaler() .setInputCol("features") .setOutputCol("scaledFeatures"); LogisticRegression lr = new LogisticRegression() .setFeaturesCol("scaledFeatures"); Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[]{scaler, lr}); PipelineModel model = pipeline.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); } }
@Test public void oneVsRestDefaultParams() { OneVsRest ova = new OneVsRest(); ova.setClassifier(new LogisticRegression()); Assert.assertEquals(ova.getLabelCol(), "label"); Assert.assertEquals(ova.getPredictionCol(), "prediction"); OneVsRestModel ovaModel = ova.fit(dataset); Dataset<Row> predictions = ovaModel.transform(dataset).select("label", "prediction"); predictions.collectAsList(); Assert.assertEquals(ovaModel.getLabelCol(), "label"); Assert.assertEquals(ovaModel.getPredictionCol(), "prediction"); } }