public int getNumColumns() { return this.getSchemaHandler().getNumColumns(); }
public Factor<?> setAsFactor(int columnIndex) { return this.getSchemaHandler().setAsFactor(columnIndex); }
public Factor<?> setAsFactor(String columnName) { return this.getSchemaHandler().setAsFactor(columnName); }
public void unsetAsFactor(String columnName) { this.getSchemaHandler().unsetAsFactor(columnName); }
public Schema getSchema() { return this.getSchemaHandler().getSchema(); }
public void unsetAsFactor(int columnIndex) { this.getSchemaHandler().unsetAsFactor(columnIndex); }
/** * The base implementation checks if the schema is null, and if so, generate a generic one. This is useful/necessary * before persistence, to avoid the situtation of null schemas being persisted. */ @Override public void beforePersisting() throws DDFException { if (this.getSchema() == null) this.getSchemaHandler().setSchema(this.getSchemaHandler().generateSchema()); }
private List<Column> getNumericColumns() { List<Schema.ColumnType> numerics = Arrays.asList(ColumnType.BIGINT, ColumnType.DOUBLE, ColumnType.INT, ColumnType.FLOAT); List<Column> columns = new ArrayList<Column>(); for(Column column: this.getDDF().getSchemaHandler().getColumns()) { if(numerics.contains(column.getType())) { columns.add(column); } } return columns; }
private List<Column> getCategoricalColumns() { List<Column> columns = new ArrayList<Column>(); for(Column column: this.getDDF().getSchemaHandler().getColumns()) { if(column.getColumnClass() == Schema.ColumnClass.FACTOR) { columns.add(column); } } return columns; }
@Override public IPersistible load(String namespace, String name) throws DDFException { Object loadedObject, schema = null; loadedObject = JsonSerDes.loadFromFile(this.getFilePath(namespace, name, ".dat")); if (loadedObject == null) throw new DDFException((String.format("Got null for IPersistible for %s/%s", namespace, name))); schema = JsonSerDes.loadFromFile(this.getFilePath(namespace, name, ".sch")); if (schema == null) throw new DDFException((String.format("Got null for Schema for %s/%s", namespace, name))); if (!(loadedObject instanceof IPersistible)) { throw new DDFException("Expected object to be IPersistible, got " + loadedObject.getClass()); } if (loadedObject instanceof DDF && schema instanceof Schema) { ((DDF) loadedObject).getSchemaHandler().setSchema((Schema) schema); } return (IPersistible) loadedObject; }
private DDF createDDFWrapper() throws DDFException { DDF ddf = this.newContainerDDFImpl(); if (ddf == null) throw new DDFException(String.format("Cannot create new container DDF for %s: %s/%s", this.getClass(), this.getNamespace(), this.getName())); // Make sure we have a namespace and name if (Strings.isNullOrEmpty(this.getName())) this.setName(ddf.getSchemaHandler().newTableName(this)); // Make sure the DDF's names match ours ddf.getManager().setDDFName(ddf, this.getName()); return ddf; }
/** * Transfer factor information from ddf to this DDF * @param ddf * @param columns Columns to re-compute factors * @throws DDFException */ public void copyFactor(DDF ddf, List<String> columns) throws DDFException { // if there is no columns to recompute factor info if (columns == null) { columns = new ArrayList<String>(); } for (Schema.Column col : ddf.getSchema().getColumns()) { if (this.getDDF().getColumn(col.getName()) != null && col.getColumnClass() == Schema.ColumnClass.FACTOR) { // Set corresponding column as factor this.getDDF().getSchemaHandler().setAsFactor(col.getName()); // if not in list of columns to re-compute factors // then we just copy existing factor info to the new ones if (!columns.contains(col.getName())) { // copy existing factor column info this.getDDF().getSchemaHandler().setFactorLevels(col.getName(), col.getOptionalFactor()); } } } this.getDDF().getSchemaHandler().computeFactorLevelsAndLevelCounts(); }
List<Schema.Column> columns = this.getDDF().getSchemaHandler().getColumns(); String[] trainedColumns = new String[columns.size()];
@Ignore public void testTransformMapReduceNative() throws DDFException { // aggregate sum of month group by year String mapFuncDef = "function(part) { keyval(key=part$year, val=part$month) }"; String reduceFuncDef = "function(key, vv) { keyval.row(key=key, val=sum(vv)) }"; DDF newddf = ddf.Transform.transformMapReduceNative(mapFuncDef, reduceFuncDef); System.out.println("name " + ddf.getName()); System.out.println("newname " + newddf.getName()); Assert.assertNotNull(newddf); Assert.assertTrue(newddf.getColumnName(0).equals("key")); Assert.assertTrue(newddf.getColumnName(1).equals("val")); Assert.assertTrue(newddf.getSchemaHandler().getColumns().get(0).getType() == ColumnType.STRING); Assert.assertTrue(newddf.getSchemaHandler().getColumns().get(1).getType() == ColumnType.INT); }
/** * Initialization to be done after constructor assignments, such as setting of the all-important DDFManager. */ protected void initialize(DDFManager manager, Object data, Class<?>[] typeSpecs, String name, Schema schema) throws DDFException { this.validateSchema(schema); this.setManager(manager); // this must be done first in case later stuff needs a manager if (typeSpecs != null) { this.getRepresentationHandler().set(data, typeSpecs); } this.getSchemaHandler().setSchema(schema); if(schema!= null && schema.getTableName() == null) { String tableName = this.getSchemaHandler().newTableName(); schema.setTableName(tableName); } manager.setDDFUUID(this, UUID.randomUUID()); if(!Strings.isNullOrEmpty(name)) manager.setDDFName(this, name); // Facades this.ML = new MLFacade(this, this.getMLSupporter()); this.VIEWS = new ViewsFacade(this, this.getViewHandler()); this.Transform = new TransformFacade(this, this.getTransformationHandler()); this.R = new RFacade(this, this.getAggregationHandler()); }
@Override public DDF updateInplace(DDF newddf) throws DDFException { //copy content of newddf to this ddf DDF curDDF = this.getDDF(); curDDF.getRepresentationHandler().reset(); curDDF.getRepresentationHandler().setRepresentations(newddf.getRepresentationHandler().getAllRepresentations()); newddf.getMetaDataHandler().copyFactor(this.getDDF()); curDDF.getSchemaHandler().setSchema(newddf.getSchema()); return curDDF; } }
Assert.assertEquals(ColumnClass.FACTOR, newddf.getSchemaHandler().getColumn("dayofweek").getColumnClass()); Assert.assertEquals(2, newddf.getSchemaHandler().getColumn("dayofweek").getOptionalFactor().getLevelMap().size()); Assert.assertEquals(ColumnClass.FACTOR, newddf2.getSchemaHandler().getColumn("dayofweek").getColumnClass()); Assert.assertEquals(2, newddf2.getSchemaHandler().getColumn("dayofweek").getOptionalFactor().getLevelMap().size()); Assert.assertTrue(ddf1.getSchemaHandler().getColumn("month").getColumnClass() == ColumnClass.FACTOR); ddf1.getSchemaHandler().computeFactorLevelsAndLevelCounts(); Assert.assertTrue(ddf1.getSchemaHandler().getColumn("month").getOptionalFactor().getLevelMap().get("[2,4]") > 0); Assert.assertEquals(ddf1.getSchemaHandler().getColumn("month").getOptionalFactor().getLevelCounts().get("[2,4]"), 6, 0); Assert.assertFalse(Strings.isNullOrEmpty(newddf.sql("select dayofweek from @this", "").getRows().get(0))); Assert.assertFalse(Strings.isNullOrEmpty(ddf1.sql("select month from @this", "").getRows().get(0))); Column col = ddf1.getSchemaHandler().getColumn("month"); MetaInfo mi = new MetaInfo(col.getName(), col.getType().toString().toLowerCase()); mi = mi.setFactor(col.getOptionalFactor().getLevelMap());