public static void main(String[] args) throws DDFException { DDFManager manager = DDFManager.get(DDFManager.EngineType.SPARK); manager.sql("drop table if exists airline", false); manager.sql("create table airline (Year int,Month int,DayofMonth int," + "DayOfWeek int,DepTime int,CRSDepTime int,ArrTime int," + "CRSArrTime int,UniqueCarrier string, FlightNum int, " + "TailNum string, ActualElapsedTime int, CRSElapsedTime int, " + "AirTime int, ArrDelay int, DepDelay int, Origin string, " + "Dest string, Distance int, TaxiIn int, TaxiOut int, Cancelled int, " + "CancellationCode string, Diverted string, CarrierDelay int, " + "WeatherDelay int, NASDelay int, SecurityDelay int, LateAircraftDelay int ) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ','", false); manager.sql("load data local inpath 'resources/test/airline.csv' into table airline", false); DDF ddf = manager.sql2ddf("SELECT * FROM AIRLINE", false); long nrow = ddf.getNumRows(); int ncol = ddf.getNumColumns(); System.out.println("Number of data row is " + nrow); System.out.println("Number of data columns is " + ncol); } }
@Test public void testInnerJoin() throws DDFException { DDF ddf = left_ddf.join(right_ddf, JoinType.INNER, Arrays.asList("cyl"),null,null); LOG.info("Column names: " +ddf.getColumnNames()); Assert.assertEquals(25, ddf.getNumRows()); }
@Test public void testTransformScaleStandard() throws DDFException { DDF newddf1 = ddf.Transform.transformScaleStandard(); Assert.assertEquals(31, newddf1.getNumRows()); Assert.assertEquals(8, newddf1.getSummary().length); }
@Test public void testLoadFromJDBC() throws DDFException, URISyntaxException { // load data from a MySQL JDBCDataSourceDescriptor desc = new JDBCDataSourceDescriptor("jdbc:mysql://localhost:3306/test", "pauser", "papwd", "mtcars"); DDF ddf = manager.load(desc); Assert.assertTrue(ddf != null); Assert.assertEquals(ddf.getNumColumns(), 11); Assert.assertEquals(ddf.getNumRows(), 32); } }
@Test public void testSubsettingWithGrepIgnoreCase() throws DDFException { createTableAirline(); DDF ddf = manager.sql2ddf("select * from airline", false); List<Column> columns = Lists.newArrayList(); Column col = new Column(); col.setName("origin"); columns.add(col); Operator grep = new Operator(); grep.setName(OperationName.grep_ic); Expression[] operands = new Expression[2]; StringVal val = new StringVal(); val.setValue("iad"); operands[0] = val; operands[1] = col; grep.setOperarands(operands); DDF ddf2 = ddf.VIEWS.subset(columns, grep); Assert.assertEquals(2, ddf2.getNumRows()); } }
@Test public void testSubsettingWithGrep() throws DDFException { createTableAirline(); DDF ddf = manager.sql2ddf("select * from airline", false); List<Column> columns = Lists.newArrayList(); Column col = new Column(); col.setName("origin"); columns.add(col); Operator grep = new Operator(); grep.setName(OperationName.grep); Expression[] operands = new Expression[2]; StringVal val = new StringVal(); val.setValue("IAD"); operands[0] = val; operands[1] = col; grep.setOperarands(operands); DDF ddf2 = ddf.VIEWS.subset(columns, grep); Assert.assertEquals(2, ddf2.getNumRows()); }
@Test public void testSummary() throws DDFException { Assert.assertEquals(14, ddf.getSummary().length); Assert.assertEquals(31, ddf.getNumRows()); createTableSmiths2(); DDF ddf3 = manager.sql2ddf("select * from smiths2", false); Summary[] summary = ddf3.getSummary(); Assert.assertEquals(summary[2].NACount(), 4); }
ddf = ddf.Transform.transformUDF("dist= round(distance/2, 2)"); Assert.assertEquals(31, ddf.getNumRows()); Assert.assertEquals(9, ddf.getNumColumns()); Assert.assertEquals("dist", ddf.getColumnName(8)); Assert.assertEquals(31, ddf.getNumRows()); Assert.assertEquals(10, ddf.getNumColumns()); Assert.assertEquals(10, ddf.getSummary().length); Assert.assertEquals(31, ddf.getNumRows()); Assert.assertEquals(5, ddf.getNumColumns()); Assert.assertEquals("speed", ddf.getColumnName(4)); Assert.assertEquals(31, ddf3.getNumRows()); Assert.assertEquals(6, ddf3.getNumColumns()); Assert.assertEquals("speed", ddf3.getColumnName(5)); TransformationHandler.RToSqlUdf(s3)); DDF ddf2 = ddf.Transform.transformUDF(s1, lcols); Assert.assertEquals(31, ddf2.getNumRows()); Assert.assertEquals(6, ddf2.getNumColumns());
@Test public void testDropNA() throws DDFException { DDF newddfDropRow = ddf.dropNA(); DDF newddfDropColumn = ddf.dropNA(Axis.COLUMN); Assert.assertEquals(9, newddfDropRow.getNumRows()); Assert.assertEquals(22, newddfDropColumn.getNumColumns()); Assert.assertEquals(29, ddf.getMissingDataHandler().dropNA(Axis.COLUMN, NAChecking.ALL, 0, null).getNumColumns()); }
long numrows = this.getDDF().getNumRows(); if (thresh > 0) { if (thresh > numrows) {
@Test public void testReservedFactor() throws DDFException { ddf.setAsFactor("year"); ddf.setAsFactor("month"); Assert.assertTrue(ddf.getSchema() != null); System.out.println(">>>>> column class = " + ddf.getColumn("year").getColumnClass()); System.out.println(">>>>> column class = " + ddf.getColumn("month").getColumnClass()); Assert.assertTrue(ddf.getColumn("year").getColumnClass() == Schema.ColumnClass.FACTOR); Assert.assertTrue(ddf.getColumn("month").getColumnClass() == Schema.ColumnClass.FACTOR); ddf.setMutable(true); ddf = ddf.Transform.transformUDF("test123= round(distance/2, 2)"); Assert.assertEquals(31, ddf.getNumRows()); Assert.assertEquals(9, ddf.getNumColumns()); Assert.assertEquals("test123", ddf.getColumnName(8)); Assert.assertEquals(9, ddf.VIEWS.head(1).get(0).split("\\t").length); System.out.println(">>>>> column class = " + ddf.getColumn("year").getColumnClass()); System.out.println(">>>>> column class = " + ddf.getColumn("month").getColumnClass()); Assert.assertTrue(ddf.getColumn("year").getColumnClass() == Schema.ColumnClass.FACTOR); Assert.assertTrue(ddf.getColumn("month").getColumnClass() == Schema.ColumnClass.FACTOR); Assert.assertTrue(ddf.getColumn("year").getOptionalFactor().getLevels().size() > 0); Assert.assertTrue(ddf.getColumn("month").getOptionalFactor().getLevels().size() > 0); System.out.println(">>>>>>>>>>>>> " + ddf.getSchema().getColumns()); }