private void setHiveQueueName() throws SQLException { this.conn.executeStatements("set " + MAPREDUCE_JOB_QUEUENAME + "=" + CompactionRunner.jobProperties.getProperty(HIVE_QUEUE_NAME, "default")); }
private void setHiveDbName() throws SQLException { this.conn.executeStatements("use " + CompactionRunner.jobProperties.getProperty(HIVE_DB_NAME, "default")); }
private void setNumberOfReducers() throws SQLException { boolean numOfReducersSpecified = CompactionRunner.jobProperties.containsKey(MAPREDUCE_JOB_NUM_REDUCERS); if (numOfReducersSpecified) { this.conn.executeStatements("set " + MAPREDUCE_JOB_REDUCES + "=" + CompactionRunner.jobProperties.getProperty(MAPREDUCE_JOB_NUM_REDUCERS)); } }
private void setHiveMapjoin() throws SQLException { boolean useMapjoin = Boolean.parseBoolean(CompactionRunner.jobProperties.getProperty(HIVE_USE_MAPJOIN, "false")); boolean smallTableSizeSpecified = CompactionRunner.jobProperties.containsKey(HIVE_MAPJOIN_SMALLTABLE_FILESIZE); if (useMapjoin && smallTableSizeSpecified) { this.conn.executeStatements("set " + HIVE_AUTO_CONVERT_JOIN + "=true"); this.conn.executeStatements("set " + HIVE_MAPJOIN_SMALLTABLE_FILESIZE + "=" + CompactionRunner.jobProperties.getProperty(HIVE_MAPJOIN_SMALLTABLE_FILESIZE)); } }
private void setHiveInputSplitSize() throws SQLException { boolean splitSizeSpecified = CompactionRunner.jobProperties.containsKey(HIVE_INPUT_SPLIT_SIZE); if (splitSizeSpecified) { this.conn.executeStatements( "set " + MAPRED_MIN_SPLIT_SIZE + "=" + CompactionRunner.jobProperties.getProperty(HIVE_INPUT_SPLIT_SIZE)); } }
private void executeQueries(List<String> queries) { if (null == queries || queries.size() == 0) { return; } try { this.hiveJdbcConnector.executeStatements(queries.toArray(new String[queries.size()])); } catch (SQLException e) { throw new RuntimeException(e); } }
@Override public void run() { try { List<String> queries = generateHiveQueries(); this.hiveJdbcConnector.executeStatements(Lists.transform(this.addFiles, file -> "ADD FILE " + file).toArray(new String[]{})); this.hiveJdbcConnector.executeStatements(Lists.transform(this.addJars, file -> "ADD JAR " + file).toArray(new String[]{})); this.hiveJdbcConnector.executeStatements(this.setupQueries.toArray(new String[]{})); this.hiveJdbcConnector.executeStatements(queries.toArray(new String[queries.size()])); super.run(); } catch (Exception e) { this.workingState = WorkUnitState.WorkingState.FAILED; log.error("Exception in HiveTask generateHiveQueries ", e); } }
public void dropTable(HiveJdbcConnector conn, String jobId) throws SQLException { String dropTableStmt = String.format(DROP_TABLE_STMT, getNameWithJobId(jobId)); conn.executeStatements(dropTableStmt); }
@Override public void createTable(HiveJdbcConnector conn, String jobID) throws SQLException { String tableName = getNameWithJobId(jobID); String dropTableStmt = String.format(DROP_TABLE_STMT, tableName); String hdfsUri = HdfsIO.getHdfsUri(); String createTableStmt = String.format(CREATE_TABLE_STMT, tableName, hdfsUri + this.dataLocationInHdfs, hdfsUri + this.schemaLocationInHdfs); conn.executeStatements(dropTableStmt, createTableStmt); }
public void createTable(HiveJdbcConnector conn, String jobId, String tableType) throws SQLException { String tableName = getNameWithJobId(jobId); String dropTableStmt = String.format(DROP_TABLE_STMT, tableName); StringBuilder sb = new StringBuilder().append("CREATE "); sb.append(tableType + " "); sb.append(tableName); sb.append('('); for (int i = 0; i < this.attributes.size(); i++) { sb.append(this.attributes.get(i).name() + " " + this.attributes.get(i).type()); if (i != this.attributes.size() - 1) { sb.append(", "); } } sb.append(")"); String createTableStmt = sb.toString(); conn.executeStatements(dropTableStmt, createTableStmt); }
private void insertFirstDeltaIntoMergedDelta(HiveManagedTable mergedDelta) throws SQLException { String insertStmt = "INSERT OVERWRITE TABLE " + mergedDelta.getNameWithJobId(this.jobId) + " SELECT * FROM " + this.deltas.get(0).getNameWithJobId(this.jobId); this.conn.executeStatements(insertStmt); }
try { if (this.hiveSettings.size() > 0) { hiveJdbcConnector.executeStatements(this.hiveSettings.toArray(new String[this.hiveSettings.size()])); hiveJdbcConnector.executeStatements("SET hive.exec.compress.output=false","SET hive.auto.convert.join=false", query); FileStatus[] fileStatusList = this.fs.listStatus(hiveTempDir); List<FileStatus> files = new ArrayList<>();
@Override public HiveTable addNewColumnsInSchema(HiveJdbcConnector conn, HiveTable table, String randomSuffix) throws SQLException { if (hasNoNewColumn(table)) { return this; } StringBuilder sb = new StringBuilder().append("ALTER TABLE " + this.getNameWithJobId(randomSuffix) + " ADD COLUMNS ("); boolean addComma = false; for (HiveAttribute attribute : table.attributes) { if (!this.attributes.contains(attribute)) { if (addComma) { sb.append(", "); } sb.append(attribute.name() + " " + attribute.type()); addComma = true; this.attributes.add(attribute); } } sb.append(')'); String alterTableStmt = sb.toString(); conn.executeStatements(alterTableStmt); return this; } }
@Override public void write(QueryBasedHiveConversionEntity hiveConversionEntity) throws IOException { List<String> conversionQueries = null; try { conversionQueries = hiveConversionEntity.getQueries(); EventWorkunitUtils.setBeginConversionDDLExecuteTimeMetadata(this.workUnit, System.currentTimeMillis()); this.hiveJdbcConnector.executeStatements(conversionQueries.toArray(new String[conversionQueries.size()])); // Adding properties for preserving partitionParams: addPropsForPublisher(hiveConversionEntity); EventWorkunitUtils.setEndConversionDDLExecuteTimeMetadata(this.workUnit, System.currentTimeMillis()); } catch (SQLException e) { StringBuilder sb = new StringBuilder(); sb.append(String.format("Failed to execute queries for %s: ", hiveConversionEntity.getPartition().isPresent() ? hiveConversionEntity.getPartition().get().getCompleteName() : hiveConversionEntity.getTable().getCompleteName())); for (String conversionQuery : conversionQueries) { sb.append("\nConversion query attempted by Hive Query writer: "); sb.append(conversionQuery); } String message = sb.toString(); log.warn(message); throw new IOException(message, e); } }
this.hiveJdbcConnector.executeStatements(publishQueries.toArray(new String[publishQueries.size()])); } finally { try { this.hiveJdbcConnector.executeStatements(cleanUpQueries.toArray(new String[cleanUpQueries.size()])); HadoopUtils.deleteDirectories(fs, directoriesToDelete, true, true); } catch(RuntimeException re) {
@BeforeClass public void setup() throws Exception { this.jdbcConnector = HiveJdbcConnector.newEmbeddedConnector(2); this.dataFile = new File(getClass().getClassLoader().getResource("hiveMaterializerTest/source/").toURI()); this.localHiveMetastore.dropDatabaseIfExists(this.dbName); this.localHiveMetastore.createTestDb(this.dbName); this.jdbcConnector.executeStatements( String.format("CREATE EXTERNAL TABLE %s.%s (id STRING, name String) PARTITIONED BY (%s String) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE", this.dbName, this.sourceTableName, this.partitionColumn), String.format("ALTER TABLE %s.%s ADD PARTITION (part = 'part1') LOCATION '%s'", this.dbName, this.sourceTableName, this.dataFile.getAbsolutePath() + "/part1"), String.format("ALTER TABLE %s.%s ADD PARTITION (part = 'part2') LOCATION '%s'", this.dbName, this.sourceTableName, this.dataFile.getAbsolutePath() + "/part2")); List<List<String>> allTable = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s", this.dbName, this.sourceTableName), 3); Assert.assertEquals(allTable.size(), 8); List<List<String>> part1 = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s WHERE %s='part1'", this.dbName, this.sourceTableName, this.partitionColumn), 3); Assert.assertEquals(part1.size(), 4); this.pool = HiveMetastoreClientPool.get(new Properties(), Optional.absent()); Table table; try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) { table = new Table(client.get().getTable(this.dbName, this.sourceTableName)); } this.dataset = new HiveDataset(FileSystem.getLocal(new Configuration()), pool, table, new Properties()); }
@Test public void testMaterializeView() throws Exception { String destinationTable = "materializeView"; File tmpDir = Files.createTempDir(); tmpDir.deleteOnExit(); String viewName = "myView"; this.jdbcConnector.executeStatements(String.format("CREATE VIEW %s.%s AS SELECT * FROM %s.%s WHERE name = 'foo'", this.dbName, viewName, this.dbName, this.sourceTableName)); Table view; try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) { view = new Table(client.get().getTable(this.dbName, viewName)); } HiveDataset viewDataset = new HiveDataset(FileSystem.getLocal(new Configuration()), pool, view, new Properties()); WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(viewDataset, HiveConverterUtils.StorageFormat.AVRO, new TableLikeStageableTableMetadata(viewDataset.getTable(), this.dbName, destinationTable, tmpDir.getAbsolutePath()), null); HiveMaterializer hiveMaterializer = new HiveMaterializer(getTaskContextForRun(workUnit)); hiveMaterializer.run(); Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL); hiveMaterializer.commit(); Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL); List<List<String>> allTable = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s", this.dbName, destinationTable), 3); Assert.assertEquals(allTable.size(), 4); Assert.assertEquals(allTable.stream().map(l -> l.get(0)).collect(Collectors.toList()), Lists.newArrayList("101", "103", "201", "203")); }
private HiveManagedTable getNotUpdatedRecords(HiveTable oldTable, HiveTable newTable) throws SQLException { LOG.info("Getting records in table " + oldTable.getNameWithJobId(this.jobId) + " but not in table " + newTable.getNameWithJobId(this.jobId)); HiveManagedTable notUpdated = new HiveManagedTable.Builder().withName("not_updated") .withPrimaryKeys(oldTable.getPrimaryKeys()).withAttributes(oldTable.getAttributes()).build(); notUpdated.createTable(this.conn, this.jobId); String leftOuterJoinStmt = "INSERT OVERWRITE TABLE " + notUpdated.getNameWithJobId(this.jobId) + " SELECT " + oldTable.getNameWithJobId(this.jobId) + ".* FROM " + oldTable.getNameWithJobId(this.jobId) + " LEFT OUTER JOIN " + newTable.getNameWithJobId(this.jobId) + " ON " + getJoinCondition(oldTable, newTable) + " WHERE " + getKeyIsNullPredicate(newTable); this.conn.executeStatements(leftOuterJoinStmt); oldTable.dropTable(this.conn, this.jobId); return notUpdated; }
private AvroExternalTable unionNotUpdatedRecordsAndDeltas(HiveManagedTable notUpdated, HiveTable mergedDelta) throws IOException, SQLException { LOG.info("Taking union of table " + notUpdated.getNameWithJobId(this.jobId) + "(records in snapshot but not in delta) and table " + mergedDelta.getNameWithJobId(this.jobId) + "(merged delta)"); HiveTable notUpdatedWithNewSchema = notUpdated.addNewColumnsInSchema(this.conn, this.latestTable, this.jobId); HiveTable mergedDeltaWithNewSchema = mergedDelta.addNewColumnsInSchema(this.conn, this.latestTable, this.jobId); AvroExternalTable outputTable = new AvroExternalTable.Builder().withName(this.outputTableName) .withPrimaryKeys(this.latestTable.getPrimaryKeys()) .withSchemaLocation(this.latestTable.getSchemaLocationInHdfs()).withDataLocation(this.outputDataLocationInHdfs) .build(); outputTable.createTable(this.conn, this.jobId); String unionStmt = "INSERT OVERWRITE TABLE " + outputTable.getNameWithJobId(this.jobId) + " SELECT " + getAttributesInNewSchema() + " FROM " + notUpdatedWithNewSchema.getNameWithJobId(this.jobId) + " UNION ALL " + "SELECT " + getAttributesInNewSchema() + " FROM " + mergedDeltaWithNewSchema.getNameWithJobId(this.jobId); this.conn.executeStatements(unionStmt); notUpdatedWithNewSchema.dropTable(this.conn, this.jobId); mergedDeltaWithNewSchema.dropTable(this.conn, this.jobId); return outputTable; }
private HiveManagedTable mergeTwoDeltas(HiveManagedTable mergedDelta, AvroExternalTable nextDelta) throws SQLException { HiveManagedTable notUpdated = getNotUpdatedRecords(mergedDelta, nextDelta); HiveTable notUpdatedWithNewSchema = notUpdated.addNewColumnsInSchema(this.conn, this.latestTable, this.jobId); HiveTable nextDeltaWithNewSchema = nextDelta.addNewColumnsInSchema(this.conn, this.latestTable, this.jobId); mergedDelta = new HiveManagedTable.Builder().withName(mergedDelta.getName()) .withAttributes(this.latestTable.getAttributes()).withPrimaryKeys(this.latestTable.getPrimaryKeys()).build(); mergedDelta.createTable(this.conn, this.jobId); String unionStmt = "INSERT OVERWRITE TABLE " + mergedDelta.getNameWithJobId(this.jobId) + " SELECT " + getAttributesInNewSchema() + " FROM " + notUpdatedWithNewSchema.getNameWithJobId(this.jobId) + " UNION ALL " + "SELECT " + getAttributesInNewSchema() + " FROM " + nextDeltaWithNewSchema.getNameWithJobId(this.jobId); this.conn.executeStatements(unionStmt); nextDelta.dropTable(this.conn, this.jobId); return mergedDelta; }