@Override public DDF transformScaleMinMax() throws DDFException { Summary[] summaryArr = this.getDDF().getSummary(); List<Column> columns = this.getDDF().getSchema().getColumns(); // Compose a transformation query StringBuffer sqlCmdBuffer = new StringBuffer("SELECT "); for (int i = 0; i < columns.size(); i++) { Column col = columns.get(i); if (!col.isNumeric() || col.getColumnClass() == ColumnClass.FACTOR) { sqlCmdBuffer.append(col.getName()).append(" "); } else { // subtract min, divide by (max - min) sqlCmdBuffer.append(String.format("((%s - %s) / %s) as %s ", col.getName(), summaryArr[i].min(), (summaryArr[i].max() - summaryArr[i].min()), col.getName())); } sqlCmdBuffer.append(","); } sqlCmdBuffer.setLength(sqlCmdBuffer.length() - 1); sqlCmdBuffer.append("FROM ").append(this.getDDF().getTableName()); DDF newddf = this.getManager().sql2ddf(sqlCmdBuffer.toString(), false); newddf.getMetaDataHandler().copyFactor(this.getDDF()); return newddf; }
@Override public DDF transformScaleStandard() throws DDFException { Summary[] summaryArr = this.getDDF().getSummary(); List<Column> columns = this.getDDF().getSchema().getColumns(); // Compose a transformation query StringBuffer sqlCmdBuffer = new StringBuffer("SELECT "); for (int i = 0; i < columns.size(); i++) { Column col = columns.get(i); if (!col.isNumeric() || col.getColumnClass() == ColumnClass.FACTOR) { sqlCmdBuffer.append(col.getName()); } else { // subtract mean, divide by stdev sqlCmdBuffer.append(String.format("((%s - %s) / %s) as %s ", col.getName(), summaryArr[i].mean(), summaryArr[i].stdev(), col.getName())); } sqlCmdBuffer.append(","); } sqlCmdBuffer.setLength(sqlCmdBuffer.length() - 1); sqlCmdBuffer.append("FROM ").append(this.getDDF().getTableName()); DDF newddf = this.getManager().sql2ddf(sqlCmdBuffer.toString(), false); newddf.getMetaDataHandler().copyFactor(this.getDDF()); return newddf; }
@Test public void testSummary() throws DDFException { Assert.assertEquals(14, ddf.getSummary().length); Assert.assertEquals(31, ddf.getNumRows()); createTableSmiths2(); DDF ddf3 = manager.sql2ddf("select * from smiths2", false); Summary[] summary = ddf3.getSummary(); Assert.assertEquals(summary[2].NACount(), 4); }
Assert.assertEquals(3, ddf.getSummary().length); IModel logModel = ddf.ML.train("logisticRegressionWithSGD", 10, 0.1); long[][] cm = ddf.ML.getConfusionMatrix(logModel, 0.5);
@Test public void testTransformScaleStandard() throws DDFException { DDF newddf1 = ddf.Transform.transformScaleStandard(); Assert.assertEquals(31, newddf1.getNumRows()); Assert.assertEquals(8, newddf1.getSummary().length); }
@Test public void testTransformScaleMinMax() throws DDFException { DDF newddf0 = ddf.Transform.transformScaleMinMax(); Summary[] summaryArr = newddf0.getSummary(); Assert.assertTrue(summaryArr[0].min() < 1); Assert.assertTrue(summaryArr[0].max() == 1); }
@Test public void testSimpleSparkDDFManager() throws DDFException { createTableAirline(); List<String> l = manager.sql("select * from airline", false).getRows(); Assert.assertEquals(31, l.size()); List<String> v = manager.sql("select count(*) from airline", false).getRows(); Assert.assertEquals(1, v.size()); Assert.assertEquals("31", v.get(0)); DDF ddf = manager.sql2ddf("select year, month, dayofweek, deptime, arrtime,origin, distance, arrdelay, " + "depdelay, carrierdelay, weatherdelay, nasdelay, securitydelay, lateaircraftdelay from airline", false); Assert.assertEquals(14, ddf.getSummary().length); manager.setDDFName(ddf, "myddf"); manager.addDDF(ddf); Assert.assertEquals(ddf, manager.getDDF(ddf.getUUID())); } }
@Test public void testSummaryBigInt() throws DDFException { DDF ddf4 = manager.sql2ddf("select floor(deptime/100) as dephour, cast(arrdelay as " + "bigint) as arrdelay1 from airline", false); Summary[] summary = ddf4.getSummary(); Assert.assertTrue(summary[0].min() != Double.NaN); Assert.assertTrue(summary[0].count() > 0); Assert.assertTrue(summary[1].min() != Double.NaN); Assert.assertTrue(summary[1].count() > 0); }
Assert.assertEquals(31, ddf.getNumRows()); Assert.assertEquals(10, ddf.getNumColumns()); Assert.assertEquals(10, ddf.getSummary().length); Assert.assertEquals(6, ddf3.getNumColumns()); Assert.assertEquals("speed", ddf3.getColumnName(5)); Assert.assertEquals(6, ddf3.getSummary().length);