public static void main(String[] args) throws DDFException { DDFManager manager = DDFManager.get(DDFManager.EngineType.SPARK); manager.sql("drop table if exists airline", false); manager.sql("create table airline (Year int,Month int,DayofMonth int," + "DayOfWeek int,DepTime int,CRSDepTime int,ArrTime int," + "CRSArrTime int,UniqueCarrier string, FlightNum int, " + "TailNum string, ActualElapsedTime int, CRSElapsedTime int, " + "AirTime int, ArrDelay int, DepDelay int, Origin string, " + "Dest string, Distance int, TaxiIn int, TaxiOut int, Cancelled int, " + "CancellationCode string, Diverted string, CarrierDelay int, " + "WeatherDelay int, NASDelay int, SecurityDelay int, LateAircraftDelay int ) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ','", false); manager.sql("load data local inpath 'resources/test/airline.csv' into table airline", false); DDF ddf = manager.sql2ddf("SELECT * FROM AIRLINE", false); long nrow = ddf.getNumRows(); int ncol = ddf.getNumColumns(); System.out.println("Number of data row is " + nrow); System.out.println("Number of data columns is " + ncol); } }
private DDF handleIndex(String index, List<String> identifierList, Table table) throws Exception { if (!mIndexPattern.matcher(index).matches()) { // Not full uri, no namespace, the index can't match. throw new Exception(">>> ERROR: Can't find the required ddf " + index); } String number = index.substring(index.indexOf('{') + 1, index.indexOf('}')).trim(); int idx = Integer.parseInt(number); if (idx < 1) { throw new Exception("In the SQL command, " + "if you use {number} as index, the number should begin from 1"); } if (idx > identifierList.size()) { throw new Exception(new ArrayIndexOutOfBoundsException()); } else { String identifier = identifierList.get(idx - 1); DDF ddf = null; try { ddf = this.mDDFManager.getDDFByName(identifier); } catch (DDFException e) { ddf = this.mDDFManager.getDDF(UUID.fromString(identifier)); } return ddf; } }
@SuppressWarnings("unchecked") private DDF newDDF(Class<?>[] argTypes, Object[] argValues) throws DDFException { String className = Config.getValueWithGlobalDefault(this.getEngine(), ConfigConstant.FIELD_DDF); if (Strings.isNullOrEmpty(className)) throw new DDFException(String.format( "Cannot determine class name for [%s] %s", this.getEngine(), "DDF")); try { Constructor<DDF> cons = (Constructor<DDF>) Class.forName(className).getDeclaredConstructor(argTypes); if (cons == null) throw new DDFException("Cannot get constructor for " + className); cons.setAccessible(true); // make sure we can use it whether it's // private, protected, or public DDF ddf = cons.newInstance(argValues); if (ddf == null) throw new DDFException("Cannot instantiate a new instance of " + className); this.addDDF(ddf); return ddf; } catch (Exception e) { throw new DDFException(String.format( "While instantiating a new %s DDF of class %s with argTypes %s and argValues %s", this.getEngine(), className, Arrays.toString(argTypes), Arrays.toString(argValues)), e); } }
@Override public SqlTypedResult sqlTyped(String command, Integer maxRows, DataSourceDescriptor dataSource) throws DDFException { // @Note This is another possible solution, which I think is more stable. // return this.getDummyDDF().getSqlHandler().sqlTyped(command, maxRows, dataSource); return new SqlTypedResult(sql(command, maxRows, dataSource)); }
@Override public DDF sql2ddf(String command, Schema schema, DataFormat dataFormat) throws DDFException { return this.sql2ddf(command, schema, null, dataFormat); }
@Test public void testSimpleSparkDDFManager() throws DDFException { createTableAirline(); List<String> l = manager.sql("select * from airline", false).getRows(); Assert.assertEquals(31, l.size()); List<String> v = manager.sql("select count(*) from airline", false).getRows(); Assert.assertEquals(1, v.size()); Assert.assertEquals("31", v.get(0)); DDF ddf = manager.sql2ddf("select year, month, dayofweek, deptime, arrtime,origin, distance, arrdelay, " + "depdelay, carrierdelay, weatherdelay, nasdelay, securitydelay, lateaircraftdelay from airline", false); Assert.assertEquals(14, ddf.getSummary().length); manager.setDDFName(ddf, "myddf"); manager.addDDF(ddf); Assert.assertEquals(ddf, manager.getDDF(ddf.getUUID())); } }
private void initialize(String name, T[] data, String engineName) throws DDFException { if (data == null || data.length == 0) throw new DDFException("Cannot initialize a null or zero-length Vector"); DDF newDDF = DDFManager.get(DDFManager.EngineType.fromString(engineName)) // .newDDF(null, (Object) data, new Class[] { Array.class, this.getParameterizedType() },name, // new Schema(name, String.format("%s %s", name, this.getParameterizedType().getSimpleName()))); this.initialize(newDDF, name); }
@Override public SqlResult sql(String command) throws DDFException { return this.sql(command, (Integer) null); }
@Test public void testConfusionMatrix() throws DDFException { DDFManager manager = DDFManager.get(DDFManager.EngineType.SPARK); try { manager.sql("drop table if exists airline", false); } catch (Exception e) { System.out.println(e); manager.sql("create table airline (Year int,Month int,DayofMonth int," + "DayOfWeek int,DepTime int,CRSDepTime int,ArrTime int," + "CRSArrTime int,UniqueCarrier string, FlightNum int, " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ','", false); manager.sql("load data local inpath '../resources/test/airline.csv' into table airline", false); DDF ddf = manager.sql2ddf("select " + "distance, depdelay, if (arrdelay > 10.89, 1, 0) as delayed from airline", false); Assert.assertEquals(3, ddf.getSummary().length); Assert.assertEquals(0, cm[1][0]); Assert.assertEquals(18, cm[1][1]); manager.shutdown();
/** * @brief Test ordinary spark query. * @throws DDFException */ @Test public void testLoading() throws DDFException { SQLDataSourceDescriptor sqlDataSourceDescriptor = new SQLDataSourceDescriptor(null, false, null, null, null); manager.sql("drop table if exists airline", sqlDataSourceDescriptor); manager.sql("create table airline (Year int,Month int,DayofMonth int," + "DayOfWeek int,DepTime int,CRSDepTime int,ArrTime int," + "CRSArrTime int,UniqueCarrier string, FlightNum int, " + "TailNum string, ActualElapsedTime int, CRSElapsedTime int, " + "AirTime int, ArrDelay int, DepDelay int, Origin string, " + "Dest string, Distance int, TaxiIn int, TaxiOut int, Cancelled int, " + "CancellationCode string, Diverted string, CarrierDelay int, " + "WeatherDelay int, NASDelay int, SecurityDelay int, LateAircraftDelay int ) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ','", sqlDataSourceDescriptor); manager.sql("load data local inpath '../resources/test/airline.csv' " + "into table airline", sqlDataSourceDescriptor); DDF ddf = manager.sql2ddf("select year, month, dayofweek, deptime, arrtime,origin, distance, arrdelay, " + "depdelay, carrierdelay, weatherdelay, nasdelay, securitydelay, lateaircraftdelay from airline", sqlDataSourceDescriptor); this.manager.setDDFName(ddf, "airlineDDF"); // DDF sql2ddfRet = manager.sql2ddf("select * from " + // "ddf://adatao/airlineDDF"); }
protected DDF getDummyDDF() throws DDFException { if (mDummyDDF == null) mDummyDDF = this.newDDF(this); return mDummyDDF; }
@Test public void testLoadConfig() throws Exception { DDFManager manager = DDFManager.get(DDFManager.EngineType.BASIC); // this will trigger a configuration loading Assert.assertEquals("basic", manager.getEngine()); Assert.assertNotNull(Config.getConfigHandler()); Assert.assertNotNull(Config.getConfigHandler().loadConfig()); }
@Test public void testDDFMAnager() throws DDFException { DDF ddf = this.getTestDDF(); DDFManager manager = this.getDDFManager(); manager.setDDFName(ddf, "myddf"); UUID newUUID = UUID.randomUUID(); manager.setDDFUUID(ddf, newUUID); DDF ddf1 = manager.getDDFByName(ddf.getName()); Assert.assertEquals(ddf1.getUUID(), ddf.getUUID()); Assert.assertEquals(ddf1.getUUID(), newUUID); }
private DDFManager getDDFManager() throws DDFException { if (mManager == null) mManager = DDFManager.get(DDFManager.EngineType.BASIC); return mManager; }
public static IPersistible doLoad(PersistenceUri uri) throws DDFException { if (uri == null) throw new DDFException("URI cannot be null"); if (Strings.isNullOrEmpty(uri.getEngine())) throw new DDFException("Engine/Protocol in URI cannot be missing"); return DDFManager.get(EngineType.fromString(uri.getEngine())).load(uri); }
@Test(expected = DDFException.class) public void testRenamingDDF() throws DDFException { DDF ddf = this.getTestDDF(); DDFManager manager = this.getDDFManager(); manager.setDDFName(ddf, "myddf1"); String name1 = ddf.getName(); manager.setDDFName(ddf, "myddf2"); manager.getDDFByName(name1); }
@Override public DDF copyFrom(DDFManager manager, String ddfname, String tgtname) throws DDFException { return this.copyFrom(manager.getDDFByName(ddfname), tgtname); }
@Test(expected = DDFException.class) public void testDDFManagerSetUUID() throws DDFException { DDF ddf = this.getTestDDF(); UUID uuid = ddf.getUUID(); DDFManager manager = getDDFManager(); UUID newUUID = UUID.randomUUID(); manager.setDDFUUID(ddf, newUUID); manager.getDDF(uuid); }
@Ignore public void TestALS() throws DDFException { createTableRatings(); DDF ratings = manager.sql2ddf("select userid, movieid, score from ratings", false); int rank = 3; double lambda = 10; int iterNum = 15; MatrixFactorizationModel model = (MatrixFactorizationModel) ratings.ML.train("collaborativeFiltering", rank, iterNum, lambda).getRawModel(); double r = model.predict(1, 4); System.out.println(">>>RATING: " + r); manager.shutdown(); } }