private void setHiveQueueName() throws SQLException { this.conn.executeStatements("set " + MAPREDUCE_JOB_QUEUENAME + "=" + CompactionRunner.jobProperties.getProperty(HIVE_QUEUE_NAME, "default")); }
private void setHiveDbName() throws SQLException { this.conn.executeStatements("use " + CompactionRunner.jobProperties.getProperty(HIVE_DB_NAME, "default")); }
private void setHiveInputSplitSize() throws SQLException { boolean splitSizeSpecified = CompactionRunner.jobProperties.containsKey(HIVE_INPUT_SPLIT_SIZE); if (splitSizeSpecified) { this.conn.executeStatements( "set " + MAPRED_MIN_SPLIT_SIZE + "=" + CompactionRunner.jobProperties.getProperty(HIVE_INPUT_SPLIT_SIZE)); } }
private void setNumberOfReducers() throws SQLException { boolean numOfReducersSpecified = CompactionRunner.jobProperties.containsKey(MAPREDUCE_JOB_NUM_REDUCERS); if (numOfReducersSpecified) { this.conn.executeStatements("set " + MAPREDUCE_JOB_REDUCES + "=" + CompactionRunner.jobProperties.getProperty(MAPREDUCE_JOB_NUM_REDUCERS)); } }
private void setHiveMapjoin() throws SQLException { boolean useMapjoin = Boolean.parseBoolean(CompactionRunner.jobProperties.getProperty(HIVE_USE_MAPJOIN, "false")); boolean smallTableSizeSpecified = CompactionRunner.jobProperties.containsKey(HIVE_MAPJOIN_SMALLTABLE_FILESIZE); if (useMapjoin && smallTableSizeSpecified) { this.conn.executeStatements("set " + HIVE_AUTO_CONVERT_JOIN + "=true"); this.conn.executeStatements("set " + HIVE_MAPJOIN_SMALLTABLE_FILESIZE + "=" + CompactionRunner.jobProperties.getProperty(HIVE_MAPJOIN_SMALLTABLE_FILESIZE)); } }
private void executeQueries(List<String> queries) { if (null == queries || queries.size() == 0) { return; } try { this.hiveJdbcConnector.executeStatements(queries.toArray(new String[queries.size()])); } catch (SQLException e) { throw new RuntimeException(e); } }
public void dropTable(HiveJdbcConnector conn, String jobId) throws SQLException { String dropTableStmt = String.format(DROP_TABLE_STMT, getNameWithJobId(jobId)); conn.executeStatements(dropTableStmt); }
@Override public void createTable(HiveJdbcConnector conn, String jobID) throws SQLException { String tableName = getNameWithJobId(jobID); String dropTableStmt = String.format(DROP_TABLE_STMT, tableName); String hdfsUri = HdfsIO.getHdfsUri(); String createTableStmt = String.format(CREATE_TABLE_STMT, tableName, hdfsUri + this.dataLocationInHdfs, hdfsUri + this.schemaLocationInHdfs); conn.executeStatements(dropTableStmt, createTableStmt); }
public void createTable(HiveJdbcConnector conn, String jobId, String tableType) throws SQLException { String tableName = getNameWithJobId(jobId); String dropTableStmt = String.format(DROP_TABLE_STMT, tableName); StringBuilder sb = new StringBuilder().append("CREATE "); sb.append(tableType + " "); sb.append(tableName); sb.append('('); for (int i = 0; i < this.attributes.size(); i++) { sb.append(this.attributes.get(i).name() + " " + this.attributes.get(i).type()); if (i != this.attributes.size() - 1) { sb.append(", "); } } sb.append(")"); String createTableStmt = sb.toString(); conn.executeStatements(dropTableStmt, createTableStmt); }
private void insertFirstDeltaIntoMergedDelta(HiveManagedTable mergedDelta) throws SQLException { String insertStmt = "INSERT OVERWRITE TABLE " + mergedDelta.getNameWithJobId(this.jobId) + " SELECT * FROM " + this.deltas.get(0).getNameWithJobId(this.jobId); this.conn.executeStatements(insertStmt); }
@Override public void write(QueryBasedHiveConversionEntity hiveConversionEntity) throws IOException { List<String> conversionQueries = null; try { conversionQueries = hiveConversionEntity.getQueries(); EventWorkunitUtils.setBeginConversionDDLExecuteTimeMetadata(this.workUnit, System.currentTimeMillis()); this.hiveJdbcConnector.executeStatements(conversionQueries.toArray(new String[conversionQueries.size()])); // Adding properties for preserving partitionParams: addPropsForPublisher(hiveConversionEntity); EventWorkunitUtils.setEndConversionDDLExecuteTimeMetadata(this.workUnit, System.currentTimeMillis()); } catch (SQLException e) { log.warn("Failed to execute queries: "); for (String conversionQuery : conversionQueries) { log.warn("Conversion query attempted by Hive Query writer: " + conversionQuery); } throw new IOException(e); } }
try { if (this.hiveSettings.size() > 0) { hiveJdbcConnector.executeStatements(this.hiveSettings.toArray(new String[this.hiveSettings.size()])); hiveJdbcConnector.executeStatements("SET hive.exec.compress.output=false","SET hive.auto.convert.join=false", query); FileStatus[] fileStatusList = this.fs.listStatus(hiveTempDir); List<FileStatus> files = new ArrayList<>();
@Override public HiveTable addNewColumnsInSchema(HiveJdbcConnector conn, HiveTable table, String randomSuffix) throws SQLException { if (hasNoNewColumn(table)) { return this; } StringBuilder sb = new StringBuilder().append("ALTER TABLE " + this.getNameWithJobId(randomSuffix) + " ADD COLUMNS ("); boolean addComma = false; for (HiveAttribute attribute : table.attributes) { if (!this.attributes.contains(attribute)) { if (addComma) { sb.append(", "); } sb.append(attribute.name() + " " + attribute.type()); addComma = true; this.attributes.add(attribute); } } sb.append(')'); String alterTableStmt = sb.toString(); conn.executeStatements(alterTableStmt); return this; } }
private HiveManagedTable getNotUpdatedRecords(HiveTable oldTable, HiveTable newTable) throws SQLException { LOG.info("Getting records in table " + oldTable.getNameWithJobId(this.jobId) + " but not in table " + newTable.getNameWithJobId(this.jobId)); HiveManagedTable notUpdated = new HiveManagedTable.Builder().withName("not_updated") .withPrimaryKeys(oldTable.getPrimaryKeys()).withAttributes(oldTable.getAttributes()).build(); notUpdated.createTable(this.conn, this.jobId); String leftOuterJoinStmt = "INSERT OVERWRITE TABLE " + notUpdated.getNameWithJobId(this.jobId) + " SELECT " + oldTable.getNameWithJobId(this.jobId) + ".* FROM " + oldTable.getNameWithJobId(this.jobId) + " LEFT OUTER JOIN " + newTable.getNameWithJobId(this.jobId) + " ON " + getJoinCondition(oldTable, newTable) + " WHERE " + getKeyIsNullPredicate(newTable); this.conn.executeStatements(leftOuterJoinStmt); oldTable.dropTable(this.conn, this.jobId); return notUpdated; }
private AvroExternalTable unionNotUpdatedRecordsAndDeltas(HiveManagedTable notUpdated, HiveTable mergedDelta) throws IOException, SQLException { LOG.info("Taking union of table " + notUpdated.getNameWithJobId(this.jobId) + "(records in snapshot but not in delta) and table " + mergedDelta.getNameWithJobId(this.jobId) + "(merged delta)"); HiveTable notUpdatedWithNewSchema = notUpdated.addNewColumnsInSchema(this.conn, this.latestTable, this.jobId); HiveTable mergedDeltaWithNewSchema = mergedDelta.addNewColumnsInSchema(this.conn, this.latestTable, this.jobId); AvroExternalTable outputTable = new AvroExternalTable.Builder().withName(this.outputTableName) .withPrimaryKeys(this.latestTable.getPrimaryKeys()) .withSchemaLocation(this.latestTable.getSchemaLocationInHdfs()).withDataLocation(this.outputDataLocationInHdfs) .build(); outputTable.createTable(this.conn, this.jobId); String unionStmt = "INSERT OVERWRITE TABLE " + outputTable.getNameWithJobId(this.jobId) + " SELECT " + getAttributesInNewSchema() + " FROM " + notUpdatedWithNewSchema.getNameWithJobId(this.jobId) + " UNION ALL " + "SELECT " + getAttributesInNewSchema() + " FROM " + mergedDeltaWithNewSchema.getNameWithJobId(this.jobId); this.conn.executeStatements(unionStmt); notUpdatedWithNewSchema.dropTable(this.conn, this.jobId); mergedDeltaWithNewSchema.dropTable(this.conn, this.jobId); return outputTable; }
private HiveManagedTable mergeTwoDeltas(HiveManagedTable mergedDelta, AvroExternalTable nextDelta) throws SQLException { HiveManagedTable notUpdated = getNotUpdatedRecords(mergedDelta, nextDelta); HiveTable notUpdatedWithNewSchema = notUpdated.addNewColumnsInSchema(this.conn, this.latestTable, this.jobId); HiveTable nextDeltaWithNewSchema = nextDelta.addNewColumnsInSchema(this.conn, this.latestTable, this.jobId); mergedDelta = new HiveManagedTable.Builder().withName(mergedDelta.getName()) .withAttributes(this.latestTable.getAttributes()).withPrimaryKeys(this.latestTable.getPrimaryKeys()).build(); mergedDelta.createTable(this.conn, this.jobId); String unionStmt = "INSERT OVERWRITE TABLE " + mergedDelta.getNameWithJobId(this.jobId) + " SELECT " + getAttributesInNewSchema() + " FROM " + notUpdatedWithNewSchema.getNameWithJobId(this.jobId) + " UNION ALL " + "SELECT " + getAttributesInNewSchema() + " FROM " + nextDeltaWithNewSchema.getNameWithJobId(this.jobId); this.conn.executeStatements(unionStmt); nextDelta.dropTable(this.conn, this.jobId); return mergedDelta; }