DataFrame results = model2.transform(test); for (Row r: results.select("features", "label", "myProbability", "prediction").collect()) { System.out.println("(" + r.get(0) + ", " + r.get(1) + ") -> prob=" + r.get(2)
@SuppressWarnings("unchecked") @Test public void logisticRegressionPredictorClassifierMethods() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); Assert.assertEquals(2, model.numClasses()); model.transform(dataset).createOrReplaceTempView("transformed"); Dataset<Row> trans1 = spark.sql("SELECT rawPrediction, probability FROM transformed"); for (Row row : trans1.collectAsList()) { Vector raw = (Vector) row.get(0); Vector prob = (Vector) row.get(1); Assert.assertEquals(raw.size(), 2); Assert.assertEquals(prob.size(), 2); double probFromRaw1 = 1.0 / (1.0 + Math.exp(-raw.apply(1))); Assert.assertEquals(0, Math.abs(prob.apply(1) - probFromRaw1), eps); Assert.assertEquals(0, Math.abs(prob.apply(0) - (1.0 - probFromRaw1)), eps); } Dataset<Row> trans2 = spark.sql("SELECT prediction, probability FROM transformed"); for (Row row : trans2.collectAsList()) { double pred = row.getDouble(0); Vector prob = (Vector) row.get(1); double probOfPred = prob.apply((int) pred); for (int i = 0; i < prob.size(); ++i) { Assert.assertTrue(probOfPred >= prob.apply(i)); } } }
@SuppressWarnings("unchecked") @Test public void logisticRegressionPredictorClassifierMethods() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); Assert.assertEquals(2, model.numClasses()); model.transform(dataset).createOrReplaceTempView("transformed"); Dataset<Row> trans1 = spark.sql("SELECT rawPrediction, probability FROM transformed"); for (Row row : trans1.collectAsList()) { Vector raw = (Vector) row.get(0); Vector prob = (Vector) row.get(1); Assert.assertEquals(raw.size(), 2); Assert.assertEquals(prob.size(), 2); double probFromRaw1 = 1.0 / (1.0 + Math.exp(-raw.apply(1))); Assert.assertEquals(0, Math.abs(prob.apply(1) - probFromRaw1), eps); Assert.assertEquals(0, Math.abs(prob.apply(0) - (1.0 - probFromRaw1)), eps); } Dataset<Row> trans2 = spark.sql("SELECT prediction, probability FROM transformed"); for (Row row : trans2.collectAsList()) { double pred = row.getDouble(0); Vector prob = (Vector) row.get(1); double probOfPred = prob.apply((int) pred); for (int i = 0; i < prob.size(); ++i) { Assert.assertTrue(probOfPred >= prob.apply(i)); } } }
@SuppressWarnings("unchecked") @Test public void logisticRegressionPredictorClassifierMethods() { LogisticRegression lr = new LogisticRegression(); LogisticRegressionModel model = lr.fit(dataset); Assert.assertEquals(2, model.numClasses()); model.transform(dataset).createOrReplaceTempView("transformed"); Dataset<Row> trans1 = spark.sql("SELECT rawPrediction, probability FROM transformed"); for (Row row : trans1.collectAsList()) { Vector raw = (Vector) row.get(0); Vector prob = (Vector) row.get(1); Assert.assertEquals(raw.size(), 2); Assert.assertEquals(prob.size(), 2); double probFromRaw1 = 1.0 / (1.0 + Math.exp(-raw.apply(1))); Assert.assertEquals(0, Math.abs(prob.apply(1) - probFromRaw1), eps); Assert.assertEquals(0, Math.abs(prob.apply(0) - (1.0 - probFromRaw1)), eps); } Dataset<Row> trans2 = spark.sql("SELECT prediction, probability FROM transformed"); for (Row row : trans2.collectAsList()) { double pred = row.getDouble(0); Vector prob = (Vector) row.get(1); double probOfPred = prob.apply((int) pred); for (int i = 0; i < prob.size(); ++i) { Assert.assertTrue(probOfPred >= prob.apply(i)); } } }
@Test public void logisticRegressionDefaultParams() { LogisticRegression lr = new LogisticRegression(); Assert.assertEquals(lr.getLabelCol(), "label"); LogisticRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); // Check defaults Assert.assertEquals(0.5, model.getThreshold(), eps); Assert.assertEquals("features", model.getFeaturesCol()); Assert.assertEquals("prediction", model.getPredictionCol()); Assert.assertEquals("probability", model.getProbabilityCol()); }
@Test public void logisticRegressionDefaultParams() { LogisticRegression lr = new LogisticRegression(); Assert.assertEquals(lr.getLabelCol(), "label"); LogisticRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); // Check defaults Assert.assertEquals(0.5, model.getThreshold(), eps); Assert.assertEquals("features", model.getFeaturesCol()); Assert.assertEquals("prediction", model.getPredictionCol()); Assert.assertEquals("probability", model.getProbabilityCol()); }
@Test public void logisticRegressionDefaultParams() { LogisticRegression lr = new LogisticRegression(); Assert.assertEquals(lr.getLabelCol(), "label"); LogisticRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); // Check defaults Assert.assertEquals(0.5, model.getThreshold(), eps); Assert.assertEquals("features", model.getFeaturesCol()); Assert.assertEquals("prediction", model.getPredictionCol()); Assert.assertEquals("probability", model.getProbabilityCol()); }
model.transform(dataset).createOrReplaceTempView("predAllZero"); Dataset<Row> predAllZero = spark.sql("SELECT prediction, myProbability FROM predAllZero"); for (Row r : predAllZero.collectAsList()) { model.transform(dataset, model.threshold().w(0.0), model.probabilityCol().w("myProb")) .createOrReplaceTempView("predNotAllZero"); Dataset<Row> predNotAllZero = spark.sql("SELECT prediction, myProb FROM predNotAllZero");
model.transform(dataset).createOrReplaceTempView("predAllZero"); Dataset<Row> predAllZero = spark.sql("SELECT prediction, myProbability FROM predAllZero"); for (Row r : predAllZero.collectAsList()) { model.transform(dataset, model.threshold().w(0.0), model.probabilityCol().w("myProb")) .createOrReplaceTempView("predNotAllZero"); Dataset<Row> predNotAllZero = spark.sql("SELECT prediction, myProb FROM predNotAllZero");
model.transform(dataset).createOrReplaceTempView("predAllZero"); Dataset<Row> predAllZero = spark.sql("SELECT prediction, myProbability FROM predAllZero"); for (Row r : predAllZero.collectAsList()) { model.transform(dataset, model.threshold().w(0.0), model.probabilityCol().w("myProb")) .createOrReplaceTempView("predNotAllZero"); Dataset<Row> predNotAllZero = spark.sql("SELECT prediction, myProb FROM predNotAllZero");