@Test public void logisticRegressionTrainingSummary() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); LogisticRegressionTrainingSummary summary = model.summary(); Assert.assertEquals(summary.totalIterations(), summary.objectiveHistory().length); } }
@Test public void logisticRegressionTrainingSummary() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); LogisticRegressionTrainingSummary summary = model.summary(); Assert.assertEquals(summary.totalIterations(), summary.objectiveHistory().length); } }
@Test public void logisticRegressionTrainingSummary() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); LogisticRegressionTrainingSummary summary = model.summary(); Assert.assertEquals(summary.totalIterations(), summary.objectiveHistory().length); } }
LogisticRegressionModel lrModel = lr.fit(training);
@SuppressWarnings("unchecked") @Test public void logisticRegressionPredictorClassifierMethods() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); Assert.assertEquals(2, model.numClasses()); model.transform(dataset).createOrReplaceTempView("transformed"); Dataset<Row> trans1 = spark.sql("SELECT rawPrediction, probability FROM transformed"); for (Row row : trans1.collectAsList()) { Vector raw = (Vector) row.get(0); Vector prob = (Vector) row.get(1); Assert.assertEquals(raw.size(), 2); Assert.assertEquals(prob.size(), 2); double probFromRaw1 = 1.0 / (1.0 + Math.exp(-raw.apply(1))); Assert.assertEquals(0, Math.abs(prob.apply(1) - probFromRaw1), eps); Assert.assertEquals(0, Math.abs(prob.apply(0) - (1.0 - probFromRaw1)), eps); } Dataset<Row> trans2 = spark.sql("SELECT prediction, probability FROM transformed"); for (Row row : trans2.collectAsList()) { double pred = row.getDouble(0); Vector prob = (Vector) row.get(1); double probOfPred = prob.apply((int) pred); for (int i = 0; i < prob.size(); ++i) { Assert.assertTrue(probOfPred >= prob.apply(i)); } } }
@SuppressWarnings("unchecked") @Test public void logisticRegressionPredictorClassifierMethods() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); Assert.assertEquals(2, model.numClasses()); model.transform(dataset).createOrReplaceTempView("transformed"); Dataset<Row> trans1 = spark.sql("SELECT rawPrediction, probability FROM transformed"); for (Row row : trans1.collectAsList()) { Vector raw = (Vector) row.get(0); Vector prob = (Vector) row.get(1); Assert.assertEquals(raw.size(), 2); Assert.assertEquals(prob.size(), 2); double probFromRaw1 = 1.0 / (1.0 + Math.exp(-raw.apply(1))); Assert.assertEquals(0, Math.abs(prob.apply(1) - probFromRaw1), eps); Assert.assertEquals(0, Math.abs(prob.apply(0) - (1.0 - probFromRaw1)), eps); } Dataset<Row> trans2 = spark.sql("SELECT prediction, probability FROM transformed"); for (Row row : trans2.collectAsList()) { double pred = row.getDouble(0); Vector prob = (Vector) row.get(1); double probOfPred = prob.apply((int) pred); for (int i = 0; i < prob.size(); ++i) { Assert.assertTrue(probOfPred >= prob.apply(i)); } } }
@SuppressWarnings("unchecked") @Test public void logisticRegressionPredictorClassifierMethods() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); Assert.assertEquals(2, model.numClasses()); model.transform(dataset).createOrReplaceTempView("transformed"); Dataset<Row> trans1 = spark.sql("SELECT rawPrediction, probability FROM transformed"); for (Row row : trans1.collectAsList()) { Vector raw = (Vector) row.get(0); Vector prob = (Vector) row.get(1); Assert.assertEquals(raw.size(), 2); Assert.assertEquals(prob.size(), 2); double probFromRaw1 = 1.0 / (1.0 + Math.exp(-raw.apply(1))); Assert.assertEquals(0, Math.abs(prob.apply(1) - probFromRaw1), eps); Assert.assertEquals(0, Math.abs(prob.apply(0) - (1.0 - probFromRaw1)), eps); } Dataset<Row> trans2 = spark.sql("SELECT prediction, probability FROM transformed"); for (Row row : trans2.collectAsList()) { double pred = row.getDouble(0); Vector prob = (Vector) row.get(1); double probOfPred = prob.apply((int) pred); for (int i = 0; i < prob.size(); ++i) { Assert.assertTrue(probOfPred >= prob.apply(i)); } } }
@Test public void logisticRegressionDefaultParams() { LogisticRegression lr = new LogisticRegression(); Assert.assertEquals(lr.getLabelCol(), "label"); LogisticRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); // Check defaults Assert.assertEquals(0.5, model.getThreshold(), eps); Assert.assertEquals("features", model.getFeaturesCol()); Assert.assertEquals("prediction", model.getPredictionCol()); Assert.assertEquals("probability", model.getProbabilityCol()); }
@Test public void logisticRegressionDefaultParams() { LogisticRegression lr = new LogisticRegression(); Assert.assertEquals(lr.getLabelCol(), "label"); LogisticRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); // Check defaults Assert.assertEquals(0.5, model.getThreshold(), eps); Assert.assertEquals("features", model.getFeaturesCol()); Assert.assertEquals("prediction", model.getPredictionCol()); Assert.assertEquals("probability", model.getProbabilityCol()); }
@Test public void logisticRegressionDefaultParams() { LogisticRegression lr = new LogisticRegression(); Assert.assertEquals(lr.getLabelCol(), "label"); LogisticRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); // Check defaults Assert.assertEquals(0.5, model.getThreshold(), eps); Assert.assertEquals("features", model.getFeaturesCol()); Assert.assertEquals("prediction", model.getPredictionCol()); Assert.assertEquals("probability", model.getProbabilityCol()); }
.setThreshold(0.6) .setProbabilityCol("myProbability"); LogisticRegressionModel model = lr.fit(dataset); LogisticRegression parent = (LogisticRegression) model.parent(); Assert.assertEquals(10, parent.getMaxIter()); LogisticRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.threshold().w(0.4), lr.probabilityCol().w("theProb")); LogisticRegression parent2 = (LogisticRegression) model2.parent();
.setThreshold(0.6) .setProbabilityCol("myProbability"); LogisticRegressionModel model = lr.fit(dataset); LogisticRegression parent = (LogisticRegression) model.parent(); Assert.assertEquals(10, parent.getMaxIter()); LogisticRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.threshold().w(0.4), lr.probabilityCol().w("theProb")); LogisticRegression parent2 = (LogisticRegression) model2.parent();
.setThreshold(0.6) .setProbabilityCol("myProbability"); LogisticRegressionModel model = lr.fit(dataset); LogisticRegression parent = (LogisticRegression) model.parent(); Assert.assertEquals(10, parent.getMaxIter()); LogisticRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.threshold().w(0.4), lr.probabilityCol().w("theProb")); LogisticRegression parent2 = (LogisticRegression) model2.parent();