public SqlTypedResult sqlTyped(String sqlCommand, String errorMessage) throws DDFException { try { sqlCommand = sqlCommand.replace("@this", this.getTableName()); return this.getManager().sqlTyped(String.format(sqlCommand, this.getTableName())); } catch (Exception e) { throw new DDFException(String.format(errorMessage, this.getTableName()), e); } }
private String buildPostgresFiveNumSql(String columnName, String tableName) { List<String> minPart = new ArrayList<String>(); List<String> percentilePart = new ArrayList<String>(); String[] quantiles = {"0","1","0.25","0.5","0.75"}; for(int i = 0; i < 5; i++) { String column = columnName+"_q"+i; minPart.add(String.format("min(%s) as %s", column, column)); percentilePart.add(String.format("percentile_disc(%s) within group (order by %s) over() as %s", quantiles[i], columnName,column)); } String sql = "select %s from \n" + "(select %s from (%s) TMP_FIVENUM)"; String query = String .format(sql, Joiner.on(",").join(minPart), Joiner.on(",").join(percentilePart), this.getDDF().getTableName()); return query; }
public SqlResult sql(String sqlCommand, String errorMessage) throws DDFException { try { // sqlCommand = sqlCommand.replace("@this", this.getTableName()); // TODO: what is format? // return this.getManager().sql(String.format(sqlCommand, this.getTableName())); sqlCommand = sqlCommand.replace("@this", "{1}"); sqlCommand = String.format(sqlCommand, "{1}"); SQLDataSourceDescriptor sqlDS = new SQLDataSourceDescriptor(sqlCommand, null, null,null, this.getUUID().toString()); return this.getManager().sql(sqlCommand, null, sqlDS); } catch (Exception e) { throw new DDFException(String.format(errorMessage, this.getTableName()), e); } }
public DDF sql2ddf(String sqlCommand) throws DDFException { try { // sqlCommand = sqlCommand.replace("@this", this.getTableName()); sqlCommand = sqlCommand.replace("@this", "{1}"); sqlCommand = String.format(sqlCommand, "{1}"); SQLDataSourceDescriptor sqlDS = new SQLDataSourceDescriptor(sqlCommand, null, null,null, this.getUUID().toString()); return this.getManager().sql2ddf(sqlCommand, null, sqlDS); // return this.getManager().sql2ddf(sqlCommand); } catch (Exception e) { throw new DDFException(String.format("Error executing queries for ddf %s", this.getTableName()), e); } }
@Override public double aggregateOnColumn(AggregateFunction function, String column) throws DDFException { return Double.parseDouble(this.getManager() .sql(String.format("SELECT %s from %s", function.toString(column), this.getDDF().getTableName()), false).getRows ().get(0)); }
protected DDF _subset(List<Column> columnExpr, Expression filter) throws DDFException { updateVectorName(filter, this.getDDF()); mLog.info("Updated filter: " + filter); String[] colNames = new String[columnExpr.size()]; for (int i = 0; i < columnExpr.size(); i++) { updateVectorName(columnExpr.get(i), this.getDDF()); colNames[i] = columnExpr.get(i).getName(); } mLog.info("Updated columns: " + Arrays.toString(columnExpr.toArray())); String sqlCmd = String.format("SELECT %s FROM %s", Joiner.on(", ").join (colNames), this.getDDF().getTableName()); if (filter != null) { sqlCmd = String.format("%s WHERE %s", sqlCmd, filter.toSql()); } mLog.info("sql = {}", sqlCmd); DDF subset = this.getDDF().getSqlHandler().sql2ddf(sqlCmd); return subset; }
private DDF sql2ddf(String sqlCommand, String errorMessage) throws DDFException { try { return this.getManager().sql2ddf(String.format(sqlCommand, "{1}"), new SQLDataSourceDescriptor(sqlCommand, null, null, null, this .getDDF().getUUID().toString())); } catch (Exception e) { throw new DDFException(String.format(errorMessage, this.getDDF().getTableName()), e); } }
List<String> byRightColumns) throws DDFException { String leftTableName = getDDF().getTableName(); String rightTableName = anotherDDF.getTableName(); List<String> rightColumns = anotherDDF.getColumnNames(); List<String> leftColumns = getDDF().getColumnNames();
@Override public DDF agg(List<String> aggregateFunctions) throws DDFException { if (mGroupedColumns.size() > 0) { // String tableName = this.getDDF().getTableName(); String groupedColSql = Joiner.on(",").join(mGroupedColumns); String selectFuncSql = convertAggregateFunctionsToSql(aggregateFunctions.get(0)); for (int i = 1; i < aggregateFunctions.size(); i++) { selectFuncSql += "," + convertAggregateFunctionsToSql(aggregateFunctions.get(i)); } String sqlCmd = String.format("SELECT %s , %s FROM %s GROUP BY %s", selectFuncSql, groupedColSql, "{1}", groupedColSql); mLog.info("SQL Command: " + sqlCmd); try { DDF resultDDF = this.getManager().sql2ddf(sqlCmd, new SQLDataSourceDescriptor(sqlCmd, true, null,null, this.getDDF().getUUID().toString())); return resultDDF; } catch (Exception e) { e.printStackTrace(); throw new DDFException("Unable to query from " + this.getDDF().getTableName(), e); } } else { throw new DDFException("Need to set grouped columns before aggregation"); } }
@Override public double computeCorrelation(String columnA, String columnB) throws DDFException { if (!(this.getDDF().getColumn(columnA).isNumeric() || this.getDDF().getColumn(columnB).isNumeric())) { throw new DDFException("Only numeric fields are accepted!"); } String sqlCmd = String.format("SELECT CORR(%s, %s) FROM %s", columnA, columnB, this.getDDF().getTableName()); try { List<String> rs = this.getManager().sql(sqlCmd, false).getRows(); return Utils.roundUp(Double.parseDouble(rs.get(0))); } catch (Exception e) { throw new DDFException(String.format("Unable to get CORR(%s, %s) FROM %s", columnA, columnB, this.getDDF() .getTableName()), e); } }
@Override public DDF transformScaleMinMax() throws DDFException { Summary[] summaryArr = this.getDDF().getSummary(); List<Column> columns = this.getDDF().getSchema().getColumns(); // Compose a transformation query StringBuffer sqlCmdBuffer = new StringBuffer("SELECT "); for (int i = 0; i < columns.size(); i++) { Column col = columns.get(i); if (!col.isNumeric() || col.getColumnClass() == ColumnClass.FACTOR) { sqlCmdBuffer.append(col.getName()).append(" "); } else { // subtract min, divide by (max - min) sqlCmdBuffer.append(String.format("((%s - %s) / %s) as %s ", col.getName(), summaryArr[i].min(), (summaryArr[i].max() - summaryArr[i].min()), col.getName())); } sqlCmdBuffer.append(","); } sqlCmdBuffer.setLength(sqlCmdBuffer.length() - 1); sqlCmdBuffer.append("FROM ").append(this.getDDF().getTableName()); DDF newddf = this.getManager().sql2ddf(sqlCmdBuffer.toString(), false); newddf.getMetaDataHandler().copyFactor(this.getDDF()); return newddf; }
@Override public DDF transformScaleStandard() throws DDFException { Summary[] summaryArr = this.getDDF().getSummary(); List<Column> columns = this.getDDF().getSchema().getColumns(); // Compose a transformation query StringBuffer sqlCmdBuffer = new StringBuffer("SELECT "); for (int i = 0; i < columns.size(); i++) { Column col = columns.get(i); if (!col.isNumeric() || col.getColumnClass() == ColumnClass.FACTOR) { sqlCmdBuffer.append(col.getName()); } else { // subtract mean, divide by stdev sqlCmdBuffer.append(String.format("((%s - %s) / %s) as %s ", col.getName(), summaryArr[i].mean(), summaryArr[i].stdev(), col.getName())); } sqlCmdBuffer.append(","); } sqlCmdBuffer.setLength(sqlCmdBuffer.length() - 1); sqlCmdBuffer.append("FROM ").append(this.getDDF().getTableName()); DDF newddf = this.getManager().sql2ddf(sqlCmdBuffer.toString(), false); newddf.getMetaDataHandler().copyFactor(this.getDDF()); return newddf; }
/** * Performs the equivalent of a SQL aggregation statement like "SELECT year, month, AVG(depdelay), MIN(arrdelay) FROM * airline GROUP BY year, month" * * @param fields {@link AggregateField}s representing a list of column specs, some of which may be aggregated, while other * non-aggregated fields are the GROUP BY keys * @return * @throws DDFException */ @Override public AggregationResult aggregate(List<AggregateField> fields) throws DDFException { String tableName = this.getDDF().getTableName(); String sqlCmd = AggregateField.toSql(fields, tableName); mLog.info("SQL Command: " + sqlCmd); int numUnaggregatedFields = 0; for (AggregateField field : fields) { if (!field.isAggregated()) numUnaggregatedFields++; } try { List<String> result = this.getManager().sql(sqlCmd, false).getRows(); return AggregationResult.newInstance(result, numUnaggregatedFields); } catch (Exception e) { e.printStackTrace(); mLog.error(e.getMessage()); throw new DDFException("Unable to query from " + tableName, e); } }
@Override public DDF copyFrom(DDF ddf, String tgtname) throws DDFException { mLog.info(String.format(">>> Copy new ddf %s from ddf %s", tgtname, ddf.getName())); DDFManager fromManager = ddf.getManager(); DataSourceDescriptor dataSourceDescriptor = fromManager.getDataSourceDescriptor(); if (dataSourceDescriptor instanceof JDBCDataSourceDescriptor) { // It's a jdbc ddf. JDBCDataSourceDescriptor jdbcDS = (JDBCDataSourceDescriptor) dataSourceDescriptor; JDBCDataSourceDescriptor loadDS = new JDBCDataSourceDescriptor(jdbcDS.getDataSourceUri(), jdbcDS.getCredentials(), ddf.getTableName()); DDF tgtddf = this.load(loadDS); this.setDDFName(tgtddf, tgtname); return tgtddf; } else { throw new DDFException("Unsupported operation in copyFrom"); } }
HiveContext sqlContext = ((SparkDDFManager) this.getDDF().getManager()).getHiveContext(); for(Column column: categoricalColumns) { String sqlCmd = String.format("select distinct(%s) from %s where %s is not null", column.getName(), this.getDDF().getTableName(), column.getName()); DataFrame sqlresult = sqlContext.sql(sqlCmd); Row[] rows = sqlresult.collect(); sql = String.format("select %s from %s", sql, this.getDDF().getTableName()); DataFrame sqlResult = sqlContext.sql(sql); Row[] rows = sqlResult.collect();
String sql = buildPostgresFiveNumSql(column, this.getDDF().getTableName());
/** * This function fills NA with given values. Default using a scalar value fillNA(value,null, 0, null, null, null, * false) * * @param value a scalar value to fill all NAs * @param method = 'ffill' for forward fill or 'bfill' for backward fill * @param limit = maximum size gap for forward or backward fill * @param function aggregate function to generate the filled value for a column * @param columnsToValues = a map to provide different values to fill for different columns * @param columns = only consider NA filling on the given columns, set to null for all columns of the DDF * @return a DDF with NAs filled */ @Override public DDF fillNA(String value, FillMethod method, long limit, AggregateFunction function, Map<String, String> columnsToValues, List<String> columns) throws DDFException { DDF newddf = null; if (columns == null) { columns = this.getDDF().getColumnNames(); } if (method == null) { String sqlCmd = fillNAWithValueSQL(value, function, columnsToValues, columns); mLog.info("FillNA sql command: " + sqlCmd); newddf = this.getManager().sql2ddf(String.format(sqlCmd, this.getDDF().getTableName()), false); } else { // interpolation methods 'ffill' or 'bfill' // TODO: } newddf.getMetaDataHandler().copyFactor(this.getDDF()); return newddf; }
tableName = mViewMapping.get(ddf.getName()); } else if (table.getAlias() != null) { tableName = "(" + ddf.getTableName() + ")"; } else { tableName = "(" + ddf.getTableName() + ") " + this.genTableName(8); table.setName(ddf.getTableName());
newddf = this.getManager().sql2ddf(String.format(sqlCmd, this.getDDF().getTableName()), false);