@Test public void logisticRegressionTrainingSummary() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); LogisticRegressionTrainingSummary summary = model.summary(); Assert.assertEquals(summary.totalIterations(), summary.objectiveHistory().length); } }
@Test public void logisticRegressionTrainingSummary() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); LogisticRegressionTrainingSummary summary = model.summary(); Assert.assertEquals(summary.totalIterations(), summary.objectiveHistory().length); } }
@Test public void logisticRegressionTrainingSummary() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); LogisticRegressionTrainingSummary summary = model.summary(); Assert.assertEquals(summary.totalIterations(), summary.objectiveHistory().length); } }
@Override public void execute() { LogisticRegression lr = new LogisticRegression() .setMaxIter(numIterations) .setRegParam(regularization)
@Test public void pipeline() { StandardScaler scaler = new StandardScaler() .setInputCol("features") .setOutputCol("scaledFeatures"); LogisticRegression lr = new LogisticRegression() .setFeaturesCol("scaledFeatures"); Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[]{scaler, lr}); PipelineModel model = pipeline.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); } }
@Test public void pipeline() { StandardScaler scaler = new StandardScaler() .setInputCol("features") .setOutputCol("scaledFeatures"); LogisticRegression lr = new LogisticRegression() .setFeaturesCol("scaledFeatures"); Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[]{scaler, lr}); PipelineModel model = pipeline.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); } }
@Test public void pipeline() { StandardScaler scaler = new StandardScaler() .setInputCol("features") .setOutputCol("scaledFeatures"); LogisticRegression lr = new LogisticRegression() .setFeaturesCol("scaledFeatures"); Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[]{scaler, lr}); PipelineModel model = pipeline.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); } }
@Test public void oneVsRestDefaultParams() { OneVsRest ova = new OneVsRest(); ova.setClassifier(new LogisticRegression()); Assert.assertEquals(ova.getLabelCol(), "label"); Assert.assertEquals(ova.getPredictionCol(), "prediction"); OneVsRestModel ovaModel = ova.fit(dataset); Dataset<Row> predictions = ovaModel.transform(dataset).select("label", "prediction"); predictions.collectAsList(); Assert.assertEquals(ovaModel.getLabelCol(), "label"); Assert.assertEquals(ovaModel.getPredictionCol(), "prediction"); } }
@SuppressWarnings("unchecked") @Test public void logisticRegressionPredictorClassifierMethods() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); Assert.assertEquals(2, model.numClasses()); model.transform(dataset).createOrReplaceTempView("transformed"); Dataset<Row> trans1 = spark.sql("SELECT rawPrediction, probability FROM transformed"); for (Row row : trans1.collectAsList()) { Vector raw = (Vector) row.get(0); Vector prob = (Vector) row.get(1); Assert.assertEquals(raw.size(), 2); Assert.assertEquals(prob.size(), 2); double probFromRaw1 = 1.0 / (1.0 + Math.exp(-raw.apply(1))); Assert.assertEquals(0, Math.abs(prob.apply(1) - probFromRaw1), eps); Assert.assertEquals(0, Math.abs(prob.apply(0) - (1.0 - probFromRaw1)), eps); } Dataset<Row> trans2 = spark.sql("SELECT prediction, probability FROM transformed"); for (Row row : trans2.collectAsList()) { double pred = row.getDouble(0); Vector prob = (Vector) row.get(1); double probOfPred = prob.apply((int) pred); for (int i = 0; i < prob.size(); ++i) { Assert.assertTrue(probOfPred >= prob.apply(i)); } } }
@Test public void oneVsRestDefaultParams() { OneVsRest ova = new OneVsRest(); ova.setClassifier(new LogisticRegression()); Assert.assertEquals(ova.getLabelCol(), "label"); Assert.assertEquals(ova.getPredictionCol(), "prediction"); OneVsRestModel ovaModel = ova.fit(dataset); Dataset<Row> predictions = ovaModel.transform(dataset).select("label", "prediction"); predictions.collectAsList(); Assert.assertEquals(ovaModel.getLabelCol(), "label"); Assert.assertEquals(ovaModel.getPredictionCol(), "prediction"); } }
@Test public void oneVsRestDefaultParams() { OneVsRest ova = new OneVsRest(); ova.setClassifier(new LogisticRegression()); Assert.assertEquals(ova.getLabelCol(), "label"); Assert.assertEquals(ova.getPredictionCol(), "prediction"); OneVsRestModel ovaModel = ova.fit(dataset); Dataset<Row> predictions = ovaModel.transform(dataset).select("label", "prediction"); predictions.collectAsList(); Assert.assertEquals(ovaModel.getLabelCol(), "label"); Assert.assertEquals(ovaModel.getPredictionCol(), "prediction"); } }
@SuppressWarnings("unchecked") @Test public void logisticRegressionPredictorClassifierMethods() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); Assert.assertEquals(2, model.numClasses()); model.transform(dataset).createOrReplaceTempView("transformed"); Dataset<Row> trans1 = spark.sql("SELECT rawPrediction, probability FROM transformed"); for (Row row : trans1.collectAsList()) { Vector raw = (Vector) row.get(0); Vector prob = (Vector) row.get(1); Assert.assertEquals(raw.size(), 2); Assert.assertEquals(prob.size(), 2); double probFromRaw1 = 1.0 / (1.0 + Math.exp(-raw.apply(1))); Assert.assertEquals(0, Math.abs(prob.apply(1) - probFromRaw1), eps); Assert.assertEquals(0, Math.abs(prob.apply(0) - (1.0 - probFromRaw1)), eps); } Dataset<Row> trans2 = spark.sql("SELECT prediction, probability FROM transformed"); for (Row row : trans2.collectAsList()) { double pred = row.getDouble(0); Vector prob = (Vector) row.get(1); double probOfPred = prob.apply((int) pred); for (int i = 0; i < prob.size(); ++i) { Assert.assertTrue(probOfPred >= prob.apply(i)); } } }
@Test public void logisticRegressionDefaultParams() { LogisticRegression lr = new LogisticRegression(); Assert.assertEquals(lr.getLabelCol(), "label"); LogisticRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); // Check defaults Assert.assertEquals(0.5, model.getThreshold(), eps); Assert.assertEquals("features", model.getFeaturesCol()); Assert.assertEquals("prediction", model.getPredictionCol()); Assert.assertEquals("probability", model.getProbabilityCol()); }
@SuppressWarnings("unchecked") @Test public void logisticRegressionPredictorClassifierMethods() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); Assert.assertEquals(2, model.numClasses()); model.transform(dataset).createOrReplaceTempView("transformed"); Dataset<Row> trans1 = spark.sql("SELECT rawPrediction, probability FROM transformed"); for (Row row : trans1.collectAsList()) { Vector raw = (Vector) row.get(0); Vector prob = (Vector) row.get(1); Assert.assertEquals(raw.size(), 2); Assert.assertEquals(prob.size(), 2); double probFromRaw1 = 1.0 / (1.0 + Math.exp(-raw.apply(1))); Assert.assertEquals(0, Math.abs(prob.apply(1) - probFromRaw1), eps); Assert.assertEquals(0, Math.abs(prob.apply(0) - (1.0 - probFromRaw1)), eps); } Dataset<Row> trans2 = spark.sql("SELECT prediction, probability FROM transformed"); for (Row row : trans2.collectAsList()) { double pred = row.getDouble(0); Vector prob = (Vector) row.get(1); double probOfPred = prob.apply((int) pred); for (int i = 0; i < prob.size(); ++i) { Assert.assertTrue(probOfPred >= prob.apply(i)); } } }
@Test public void logisticRegressionDefaultParams() { LogisticRegression lr = new LogisticRegression(); Assert.assertEquals(lr.getLabelCol(), "label"); LogisticRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); // Check defaults Assert.assertEquals(0.5, model.getThreshold(), eps); Assert.assertEquals("features", model.getFeaturesCol()); Assert.assertEquals("prediction", model.getPredictionCol()); Assert.assertEquals("probability", model.getProbabilityCol()); }
@Test public void logisticRegressionDefaultParams() { LogisticRegression lr = new LogisticRegression(); Assert.assertEquals(lr.getLabelCol(), "label"); LogisticRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); // Check defaults Assert.assertEquals(0.5, model.getThreshold(), eps); Assert.assertEquals("features", model.getFeaturesCol()); Assert.assertEquals("prediction", model.getPredictionCol()); Assert.assertEquals("probability", model.getProbabilityCol()); }
@Test public void crossValidationWithLogisticRegression() { LogisticRegression lr = new LogisticRegression(); ParamMap[] lrParamMaps = new ParamGridBuilder() .addGrid(lr.regParam(), new double[]{0.001, 1000.0}) .addGrid(lr.maxIter(), new int[]{0, 10}) .build(); BinaryClassificationEvaluator eval = new BinaryClassificationEvaluator(); CrossValidator cv = new CrossValidator() .setEstimator(lr) .setEstimatorParamMaps(lrParamMaps) .setEvaluator(eval) .setNumFolds(3); CrossValidatorModel cvModel = cv.fit(dataset); LogisticRegression parent = (LogisticRegression) cvModel.bestModel().parent(); Assert.assertEquals(0.001, parent.getRegParam(), 0.0); Assert.assertEquals(10, parent.getMaxIter()); } }
@Test public void crossValidationWithLogisticRegression() { LogisticRegression lr = new LogisticRegression(); ParamMap[] lrParamMaps = new ParamGridBuilder() .addGrid(lr.regParam(), new double[]{0.001, 1000.0}) .addGrid(lr.maxIter(), new int[]{0, 10}) .build(); BinaryClassificationEvaluator eval = new BinaryClassificationEvaluator(); CrossValidator cv = new CrossValidator() .setEstimator(lr) .setEstimatorParamMaps(lrParamMaps) .setEvaluator(eval) .setNumFolds(3); CrossValidatorModel cvModel = cv.fit(dataset); LogisticRegression parent = (LogisticRegression) cvModel.bestModel().parent(); Assert.assertEquals(0.001, parent.getRegParam(), 0.0); Assert.assertEquals(10, parent.getMaxIter()); } }
@Test public void crossValidationWithLogisticRegression() { LogisticRegression lr = new LogisticRegression(); ParamMap[] lrParamMaps = new ParamGridBuilder() .addGrid(lr.regParam(), new double[]{0.001, 1000.0}) .addGrid(lr.maxIter(), new int[]{0, 10}) .build(); BinaryClassificationEvaluator eval = new BinaryClassificationEvaluator(); CrossValidator cv = new CrossValidator() .setEstimator(lr) .setEstimatorParamMaps(lrParamMaps) .setEvaluator(eval) .setNumFolds(3); CrossValidatorModel cvModel = cv.fit(dataset); LogisticRegression parent = (LogisticRegression) cvModel.bestModel().parent(); Assert.assertEquals(0.001, parent.getRegParam(), 0.0); Assert.assertEquals(10, parent.getMaxIter()); } }
@Test public void logisticRegressionWithSetters() { LogisticRegression lr = new LogisticRegression() .setMaxIter(10) .setRegParam(1.0)