@Override public HiveTable addNewColumnsInSchema(HiveJdbcConnector conn, HiveTable table, String jobId) throws SQLException { if (hasNoNewColumn(table)) { return this; } HiveManagedTable managedTable = new HiveManagedTable.Builder().withName(this.name).withPrimaryKeys(this.primaryKeys) .withAttributes(this.attributes).build(); return managedTable.addNewColumnsInSchema(null, table, jobId); }
private void createTables() throws SQLException { this.snapshot.createTable(this.conn, this.jobId); for (AvroExternalTable delta : this.deltas) { delta.createTable(this.conn, this.jobId); } }
private void deleteTmpFiles() throws IllegalArgumentException, IOException { this.snapshot.deleteTmpFilesIfNeeded(); for (AvroExternalTable delta : this.deltas) { delta.deleteTmpFilesIfNeeded(); } }
private HiveManagedTable mergeTwoDeltas(HiveManagedTable mergedDelta, AvroExternalTable nextDelta) throws SQLException { HiveManagedTable notUpdated = getNotUpdatedRecords(mergedDelta, nextDelta); HiveTable notUpdatedWithNewSchema = notUpdated.addNewColumnsInSchema(this.conn, this.latestTable, this.jobId); HiveTable nextDeltaWithNewSchema = nextDelta.addNewColumnsInSchema(this.conn, this.latestTable, this.jobId); mergedDelta = new HiveManagedTable.Builder().withName(mergedDelta.getName()) .withAttributes(this.latestTable.getAttributes()).withPrimaryKeys(this.latestTable.getPrimaryKeys()).build(); mergedDelta.createTable(this.conn, this.jobId); String unionStmt = "INSERT OVERWRITE TABLE " + mergedDelta.getNameWithJobId(this.jobId) + " SELECT " + getAttributesInNewSchema() + " FROM " + notUpdatedWithNewSchema.getNameWithJobId(this.jobId) + " UNION ALL " + "SELECT " + getAttributesInNewSchema() + " FROM " + nextDeltaWithNewSchema.getNameWithJobId(this.jobId); this.conn.executeStatements(unionStmt); nextDelta.dropTable(this.conn, this.jobId); return mergedDelta; }
private HiveManagedTable getNotUpdatedRecords(HiveTable oldTable, HiveTable newTable) throws SQLException { LOG.info("Getting records in table " + oldTable.getNameWithJobId(this.jobId) + " but not in table " + newTable.getNameWithJobId(this.jobId)); HiveManagedTable notUpdated = new HiveManagedTable.Builder().withName("not_updated") .withPrimaryKeys(oldTable.getPrimaryKeys()).withAttributes(oldTable.getAttributes()).build(); notUpdated.createTable(this.conn, this.jobId); String leftOuterJoinStmt = "INSERT OVERWRITE TABLE " + notUpdated.getNameWithJobId(this.jobId) + " SELECT " + oldTable.getNameWithJobId(this.jobId) + ".* FROM " + oldTable.getNameWithJobId(this.jobId) + " LEFT OUTER JOIN " + newTable.getNameWithJobId(this.jobId) + " ON " + getJoinCondition(oldTable, newTable) + " WHERE " + getKeyIsNullPredicate(newTable); this.conn.executeStatements(leftOuterJoinStmt); oldTable.dropTable(this.conn, this.jobId); return notUpdated; }
private HiveTable mergeDeltas() throws SQLException { if (this.deltas.size() == 1) { LOG.info("Only one delta table: no need to merge delta"); return this.deltas.get(0); } HiveManagedTable mergedDelta = new HiveManagedTable.Builder().withName("merged_delta").withAttributes(this.deltas.get(0).getAttributes()) .withPrimaryKeys(this.deltas.get(0).getPrimaryKeys()).build(); mergedDelta.createTable(this.conn, this.jobId); insertFirstDeltaIntoMergedDelta(mergedDelta); this.deltas.get(0).dropTable(this.conn, this.jobId); for (int i = 1; i < this.deltas.size(); i++) { mergedDelta = mergeTwoDeltas(mergedDelta, this.deltas.get(i)); LOG.info("Merged the first " + (i + 1) + " delta tables"); this.deltas.get(i).dropTable(this.conn, this.jobId); } return mergedDelta; }
private static void compact() throws IOException { SerialCompactor sc = new SerialCompactor.Builder().withSnapshot(buildSnapshotTable()).withDeltas(buildDeltaTables()) .withOutputTableName(jobProperties.getProperty(OUTPUT + NAME)) .withOutputDataLocationInHdfs(jobProperties.getProperty(OUTPUT + DATALOCATION)).build(); sc.compact(); }
private void setHiveParameters() throws SQLException { setHiveQueueName(); setHiveDbName(); setHiveMapjoin(); setHiveInputSplitSize(); setNumberOfReducers(); }
private List<HiveAttribute> getAttributesFromAvroSchemaFile() throws IOException { try (InputStream schemaInputStream = new HdfsReader(this.schemaLocationInHdfs).getInputStream()) { Schema schema = new Schema.Parser().parse(schemaInputStream); return parseSchema(schema); } }
protected void deleteTmpFilesIfNeeded() throws IllegalArgumentException, IOException { if (this.deleteSchemaAfterDone) { new HdfsWriter(this.schemaLocationInHdfs).delete(); } if (this.deleteDataAfterDone) { new HdfsWriter(this.dataLocationInHdfs).delete(); } }
private String getAttributesInNewSchema() { StringBuilder sb = new StringBuilder(); for (int i = 0; i < this.latestTable.getAttributes().size(); i++) { sb.append(this.latestTable.getAttributes().get(i).name()); if (i < this.latestTable.getAttributes().size() - 1) { sb.append(", "); } } return sb.toString(); }
private void insertFirstDeltaIntoMergedDelta(HiveManagedTable mergedDelta) throws SQLException { String insertStmt = "INSERT OVERWRITE TABLE " + mergedDelta.getNameWithJobId(this.jobId) + " SELECT * FROM " + this.deltas.get(0).getNameWithJobId(this.jobId); this.conn.executeStatements(insertStmt); }
@Override public void createTable(HiveJdbcConnector conn, String jobID) throws SQLException { String tableName = getNameWithJobId(jobID); String dropTableStmt = String.format(DROP_TABLE_STMT, tableName); String hdfsUri = HdfsIO.getHdfsUri(); String createTableStmt = String.format(CREATE_TABLE_STMT, tableName, hdfsUri + this.dataLocationInHdfs, hdfsUri + this.schemaLocationInHdfs); conn.executeStatements(dropTableStmt, createTableStmt); }
@Override public void createTable(HiveJdbcConnector conn, String randomSuffix) throws SQLException { createTable(conn, randomSuffix, "TABLE"); }
public HiveManagedTable build() { return new HiveManagedTable(this); } }
public SerialCompactor build() { return new SerialCompactor(this); } }
public HdfsIO(String filePathInHdfs) throws IOException { this.filePathInHdfs = filePathInHdfs; this.fileSystem = getFileSystem(); }
public AvroExternalTable build() throws IOException { return new AvroExternalTable(this); } }
private static AvroExternalTable buildSnapshotTable() throws IOException { return buildAvroExternalTable(SNAPSHOT); }
public void createTemporaryTable(HiveJdbcConnector conn, String randomSuffix) throws SQLException { createTable(conn, randomSuffix, "TEMPORARY TABLE"); }