private String fiveNumHiveFunction(String columnName) { ColumnType colType = this.getDDF().getColumn(columnName).getType(); if(ColumnType.isIntegral(colType)) return String.format("PERCENTILE(%s, array(0, 1, 0.25, 0.5, 0.75))", columnName); else if(ColumnType.isFractional(colType)) return String.format("MIN(%s), MAX(%s), PERCENTILE_APPROX(%s, array(0.25, 0.5, 0.75))", columnName, columnName, columnName); return ""; }
/** * @brief Get the column information of this table. * @param name The URI or the name of the ddf. * @return The column information. * @throws DDFException */ private SqlResult describeTable(String name) throws DDFException { DDF ddf = this.getManager().getDDFByName(name); int colSize = ddf.getNumColumns(); List<String> ret = new ArrayList<String>(); for (int colIdx = 0; colIdx < colSize; ++colIdx) { Schema.Column col = ddf.getColumn(ddf.getColumnName(colIdx)); ret.add(col.getName().concat("\t").concat(col.getType().toString() .toLowerCase())); } List<Column> columnList = new ArrayList<Column>(); columnList.add(new Column("column_name", Schema.ColumnType.STRING)); columnList.add(new Column("value_type", Schema.ColumnType.STRING)); Schema schema = new Schema("table_info", columnList); return new SqlResult(schema, ret); }
if (!Strings.isNullOrEmpty(value)) { // fill by value if (this.getDDF().getColumn(col).isNumeric()) { caseCmd.append(fillNACaseSql(col, value)); } else { if (this.getDDF().getColumn(col).isNumeric()) { caseCmd.append(fillNACaseSql(col, filledValue)); } else { Column curColumn = this.getDDF().getColumn(col); if (this.getDDF().getColumn(col).isNumeric()) { double filledValue = this.getDDF().getAggregationHandler().aggregateOnColumn(function, col); if (curColumn.getType() == ColumnType.DOUBLE) {
@Test public void testReservedFactor() throws DDFException { ddf.setAsFactor("year"); ddf.setAsFactor("month"); Assert.assertTrue(ddf.getSchema() != null); System.out.println(">>>>> column class = " + ddf.getColumn("year").getColumnClass()); System.out.println(">>>>> column class = " + ddf.getColumn("month").getColumnClass()); Assert.assertTrue(ddf.getColumn("year").getColumnClass() == Schema.ColumnClass.FACTOR); Assert.assertTrue(ddf.getColumn("month").getColumnClass() == Schema.ColumnClass.FACTOR); ddf.setMutable(true); ddf = ddf.Transform.transformUDF("test123= round(distance/2, 2)"); Assert.assertEquals(31, ddf.getNumRows()); Assert.assertEquals(9, ddf.getNumColumns()); Assert.assertEquals("test123", ddf.getColumnName(8)); Assert.assertEquals(9, ddf.VIEWS.head(1).get(0).split("\\t").length); System.out.println(">>>>> column class = " + ddf.getColumn("year").getColumnClass()); System.out.println(">>>>> column class = " + ddf.getColumn("month").getColumnClass()); Assert.assertTrue(ddf.getColumn("year").getColumnClass() == Schema.ColumnClass.FACTOR); Assert.assertTrue(ddf.getColumn("month").getColumnClass() == Schema.ColumnClass.FACTOR); Assert.assertTrue(ddf.getColumn("year").getOptionalFactor().getLevels().size() > 0); Assert.assertTrue(ddf.getColumn("month").getOptionalFactor().getLevels().size() > 0); System.out.println(">>>>>>>>>>>>> " + ddf.getSchema().getColumns()); }
ColumnType columnType = this.getDDF().getColumn(columnName).getType(); mLog.info("Column type: " + columnType.name());
@Override public double computeCorrelation(String columnA, String columnB) throws DDFException { if (!(this.getDDF().getColumn(columnA).isNumeric() || this.getDDF().getColumn(columnB).isNumeric())) { throw new DDFException("Only numeric fields are accepted!"); } String sqlCmd = String.format("SELECT CORR(%s, %s) FROM %s", columnA, columnB, this.getDDF().getTableName()); try { List<String> rs = this.getManager().sql(sqlCmd, false).getRows(); return Utils.roundUp(Double.parseDouble(rs.get(0))); } catch (Exception e) { throw new DDFException(String.format("Unable to get CORR(%s, %s) FROM %s", columnA, columnB, this.getDDF() .getTableName()), e); } }
if (ColumnType.isNumeric(this.getDDF().getColumn(columnName).getType())) { numericColumns.add(columnName);
/** * Transfer factor information from ddf to this DDF * @param ddf * @param columns Columns to re-compute factors * @throws DDFException */ public void copyFactor(DDF ddf, List<String> columns) throws DDFException { // if there is no columns to recompute factor info if (columns == null) { columns = new ArrayList<String>(); } for (Schema.Column col : ddf.getSchema().getColumns()) { if (this.getDDF().getColumn(col.getName()) != null && col.getColumnClass() == Schema.ColumnClass.FACTOR) { // Set corresponding column as factor this.getDDF().getSchemaHandler().setAsFactor(col.getName()); // if not in list of columns to re-compute factors // then we just copy existing factor info to the new ones if (!columns.contains(col.getName())) { // copy existing factor column info this.getDDF().getSchemaHandler().setFactorLevels(col.getName(), col.getOptionalFactor()); } } } this.getDDF().getSchemaHandler().computeFactorLevelsAndLevelCounts(); }