/** * The base implementation checks if the schema is null, and if so, generate a generic one. This is useful/necessary * before persistence, to avoid the situtation of null schemas being persisted. */ @Override public void beforePersisting() throws DDFException { if (this.getSchema() == null) this.getSchemaHandler().setSchema(this.getSchemaHandler().generateSchema()); }
/** * @param columnName * @return */ public int getColumnIndex(String columnName) { return this.getSchema().getColumnIndex(columnName); }
/** * Transfer factor information from ddf to this DDF * @param ddf * @param columns Columns to re-compute factors * @throws DDFException */ public void copyFactor(DDF ddf, List<String> columns) throws DDFException { // if there is no columns to recompute factor info if (columns == null) { columns = new ArrayList<String>(); } for (Schema.Column col : ddf.getSchema().getColumns()) { if (this.getDDF().getColumn(col.getName()) != null && col.getColumnClass() == Schema.ColumnClass.FACTOR) { // Set corresponding column as factor this.getDDF().getSchemaHandler().setAsFactor(col.getName()); // if not in list of columns to re-compute factors // then we just copy existing factor info to the new ones if (!columns.contains(col.getName())) { // copy existing factor column info this.getDDF().getSchemaHandler().setFactorLevels(col.getName(), col.getOptionalFactor()); } } } this.getDDF().getSchemaHandler().computeFactorLevelsAndLevelCounts(); }
public String getColumnName(int columnIndex) { return this.getSchema().getColumnName(columnIndex); }
@Override public DDF updateInplace(DDF newddf) throws DDFException { //copy content of newddf to this ddf DDF curDDF = this.getDDF(); curDDF.getRepresentationHandler().reset(); curDDF.getRepresentationHandler().setRepresentations(newddf.getRepresentationHandler().getAllRepresentations()); newddf.getMetaDataHandler().copyFactor(this.getDDF()); curDDF.getSchemaHandler().setSchema(newddf.getSchema()); return curDDF; } }
public String getTableName() { return this.getSchema().getTableName(); }
Assert.assertTrue(mi.hasFactor()); MetaInfo[] m = generateMetaInfo(newddf.getSchema()); for (int i = 0; i < m.length; i++) { if (m[i].getHeader().equals("dayofweek")) {
public List<String> getColumnNames() { return this.getSchema().getColumnNames(); }
public Column getColumn(String column) { return this.getSchema().getColumn(column); }
public void setColumnNames(List<String> columnNames) {this.getSchema().setColumnNames(columnNames);}
private io.ddf.content.Schema.Column[] selectColumnMetaInfo(List<Column> columns, DDF ddf) { int length = columns.size(); io.ddf.content.Schema.Column[] retObj = new io.ddf.content.Schema.Column[length]; for (int i = 0; i < length; i++) { retObj[i] = ddf.getSchema().getColumn(columns.get(i).getIndex()); } return retObj; }
public Double[] getVectorQuantiles(Double[] percentiles) throws DDFException { if (getSchema().getNumColumns() != 1) { throw new DDFException("This method only applies to one columned DDF."); } return this.getStatisticsSupporter().getVectorQuantiles(getSchema().getColumn(0).getName(), percentiles); }
@Override public DDF transformScaleMinMax() throws DDFException { Summary[] summaryArr = this.getDDF().getSummary(); List<Column> columns = this.getDDF().getSchema().getColumns(); // Compose a transformation query StringBuffer sqlCmdBuffer = new StringBuffer("SELECT "); for (int i = 0; i < columns.size(); i++) { Column col = columns.get(i); if (!col.isNumeric() || col.getColumnClass() == ColumnClass.FACTOR) { sqlCmdBuffer.append(col.getName()).append(" "); } else { // subtract min, divide by (max - min) sqlCmdBuffer.append(String.format("((%s - %s) / %s) as %s ", col.getName(), summaryArr[i].min(), (summaryArr[i].max() - summaryArr[i].min()), col.getName())); } sqlCmdBuffer.append(","); } sqlCmdBuffer.setLength(sqlCmdBuffer.length() - 1); sqlCmdBuffer.append("FROM ").append(this.getDDF().getTableName()); DDF newddf = this.getManager().sql2ddf(sqlCmdBuffer.toString(), false); newddf.getMetaDataHandler().copyFactor(this.getDDF()); return newddf; }
@Override public DDF transformScaleStandard() throws DDFException { Summary[] summaryArr = this.getDDF().getSummary(); List<Column> columns = this.getDDF().getSchema().getColumns(); // Compose a transformation query StringBuffer sqlCmdBuffer = new StringBuffer("SELECT "); for (int i = 0; i < columns.size(); i++) { Column col = columns.get(i); if (!col.isNumeric() || col.getColumnClass() == ColumnClass.FACTOR) { sqlCmdBuffer.append(col.getName()); } else { // subtract mean, divide by stdev sqlCmdBuffer.append(String.format("((%s - %s) / %s) as %s ", col.getName(), summaryArr[i].mean(), summaryArr[i].stdev(), col.getName())); } sqlCmdBuffer.append(","); } sqlCmdBuffer.setLength(sqlCmdBuffer.length() - 1); sqlCmdBuffer.append("FROM ").append(this.getDDF().getTableName()); DDF newddf = this.getManager().sql2ddf(sqlCmdBuffer.toString(), false); newddf.getMetaDataHandler().copyFactor(this.getDDF()); return newddf; }
protected void updateVectorName(Expression expression, DDF ddf) { if (expression == null) { return; } if (expression.getType().equals("Column")) { Column vec = (Column) expression; if (vec.getName() == null) { Integer i = vec.getIndex(); if (i != null) { vec.setName(ddf.getSchema().getColumnName(i)); } } return; } if (expression instanceof Operator) { Expression[] newOps = ((Operator) expression).getOperands(); for (Expression newOp : newOps) { updateVectorName(newOp, ddf); } } }
public DDF transformUDF(List<String> RExps, List<String> columns) throws DDFException { String sqlCmd = String.format("SELECT %s FROM %s", RToSqlUdf(RExps, columns, this.getDDF().getSchema().getColumns()), "{1}"); DDF newddf = this.getManager().sql2ddf(sqlCmd, new SQLDataSourceDescriptor(sqlCmd, null, null, null, this.getDDF() .getUUID().toString())); if (this.getDDF().isMutable()) { return this.getDDF().updateInplace(newddf); } else { newddf.getMetaDataHandler().copyFactor(this.getDDF()); return newddf; } }
@Ignore public void testModelSerialize2DDF() throws DDFException { DummyModel dummyModel = new DummyModel(20, "dummymodel2"); Model model = new Model(dummyModel); DDFManager manager = DDFManager.get(DDFManager.EngineType.BASIC); DDF ddf = model.serialize2DDF(manager); Object obj = ddf.getRepresentationHandler().get(List.class, String.class); List<Schema.Column> cols = ddf.getSchema().getColumns(); List<String> lsString = (List<String>) obj; Assert.assertTrue(obj != null); Assert.assertTrue(obj instanceof List); Assert.assertTrue(ddf != null); APersistenceHandler.PersistenceUri uri = ddf.persist(); PersistenceHandler pHandler = new PersistenceHandler(null); DDF ddf2 = (DDF) pHandler.load(uri); Model model2 = Model.deserializeFromDDF((BasicDDF) ddf2); Assert.assertTrue(ddf2 != null); Assert.assertTrue(model2 != null); Assert.assertTrue(model2.getRawModel() instanceof DummyModel); } }
@Test public void testReservedFactor() throws DDFException { ddf.setAsFactor("year"); ddf.setAsFactor("month"); Assert.assertTrue(ddf.getSchema() != null); System.out.println(">>>>> column class = " + ddf.getColumn("year").getColumnClass()); System.out.println(">>>>> column class = " + ddf.getColumn("month").getColumnClass()); Assert.assertTrue(ddf.getColumn("year").getColumnClass() == Schema.ColumnClass.FACTOR); Assert.assertTrue(ddf.getColumn("month").getColumnClass() == Schema.ColumnClass.FACTOR); ddf.setMutable(true); ddf = ddf.Transform.transformUDF("test123= round(distance/2, 2)"); Assert.assertEquals(31, ddf.getNumRows()); Assert.assertEquals(9, ddf.getNumColumns()); Assert.assertEquals("test123", ddf.getColumnName(8)); Assert.assertEquals(9, ddf.VIEWS.head(1).get(0).split("\\t").length); System.out.println(">>>>> column class = " + ddf.getColumn("year").getColumnClass()); System.out.println(">>>>> column class = " + ddf.getColumn("month").getColumnClass()); Assert.assertTrue(ddf.getColumn("year").getColumnClass() == Schema.ColumnClass.FACTOR); Assert.assertTrue(ddf.getColumn("month").getColumnClass() == Schema.ColumnClass.FACTOR); Assert.assertTrue(ddf.getColumn("year").getOptionalFactor().getLevels().size() > 0); Assert.assertTrue(ddf.getColumn("month").getOptionalFactor().getLevels().size() > 0); System.out.println(">>>>>>>>>>>>> " + ddf.getSchema().getColumns()); }
@Override public APersistenceHandler.PersistenceUri persist(boolean doOverwrite) throws DDFException { if (this.getDDF() == null) throw new DDFException("DDF cannot be null"); String dataFile = this.getDataFileName(); String schemaFile = this.getSchemaFileName(); try { if (!doOverwrite && (Utils.fileExists(dataFile) || Utils.fileExists(schemaFile))) { throw new DDFException("DDF already exists in persistence storage, and overwrite option is false"); } } catch (IOException e) { throw new DDFException(e); } try { this.getDDF().beforePersisting(); //if overwrite and existed if (doOverwrite && (Utils.fileExists(dataFile) || Utils.fileExists(schemaFile))) { if(Utils.fileExists(dataFile)) Utils.deleteFile(dataFile); if(Utils.fileExists(schemaFile)) Utils.deleteFile(schemaFile); } Utils.writeToFile(dataFile, JsonSerDes.serialize(this.getDDF()) + '\n'); Utils.writeToFile(schemaFile, JsonSerDes.serialize(this.getDDF().getSchema()) + '\n'); this.getDDF().afterPersisting(); } catch (Exception e) { if (e instanceof DDFException) throw (DDFException) e; else throw new DDFException(e); } return new PersistenceUri(this.getDDF().getEngine(), dataFile); }