/** * Create a work unit to materialize a table / view to a target table using a staging table in between. * @param dataset {@link HiveDataset} for the source table. * @param storageFormat format in which target table should be written. * @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata. */ public static HiveWorkUnit viewMaterializationWorkUnit(HiveDataset dataset, HiveConverterUtils.StorageFormat storageFormat, StageableTableMetadata destinationTable, @Nullable String partitionName) { HiveWorkUnit workUnit = new HiveWorkUnit(dataset); workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.TABLE_MATERIALIZATION.name()); workUnit.setProp(STORAGE_FORMAT_KEY, storageFormat.name()); workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable)); if (!Strings.isNullOrEmpty(partitionName)) { workUnit.setPartitionName(partitionName); } TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class); return workUnit; }
/** * Generates a CTAS statement to dump the results of a query into a new table. * @param outputDbAndTable output db and table where contents should be written. * @param sourceQuery query to materialize. * @param storageFormat format of output table. * @param outputTableLocation location where files of output table should be written. */ public static String generateStagingCTASStatement(HiveDatasetFinder.DbAndTable outputDbAndTable, String sourceQuery, StorageFormat storageFormat, String outputTableLocation) { Preconditions.checkArgument(!Strings.isNullOrEmpty(outputDbAndTable.getDb()) && !Strings.isNullOrEmpty(outputDbAndTable.getTable()), "Invalid output db and table " + outputDbAndTable); return String.format("CREATE TEMPORARY TABLE `%s`.`%s` STORED AS %s LOCATION '%s' AS %s", outputDbAndTable.getDb(), outputDbAndTable.getTable(), storageFormat.getHiveName(), outputTableLocation, sourceQuery); }
/** * Create a work unit to materialize a table / view to a target table using a staging table in between. * @param dataset {@link HiveDataset} for the source table. * @param storageFormat format in which target table should be written. * @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata. */ public static HiveWorkUnit viewMaterializationWorkUnit(HiveDataset dataset, HiveConverterUtils.StorageFormat storageFormat, StageableTableMetadata destinationTable, @Nullable String partitionName) { HiveWorkUnit workUnit = new HiveWorkUnit(dataset); workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.TABLE_MATERIALIZATION.name()); workUnit.setProp(STORAGE_FORMAT_KEY, storageFormat.name()); workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable)); if (!Strings.isNullOrEmpty(partitionName)) { workUnit.setPartitionName(partitionName); } TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class); return workUnit; }
/** * Create a work unit to materialize a query to a target table using a staging table in between. * @param query the query to materialize. * @param storageFormat format in which target table should be written. * @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata. */ public static WorkUnit queryResultMaterializationWorkUnit(String query, HiveConverterUtils.StorageFormat storageFormat, StageableTableMetadata destinationTable) { WorkUnit workUnit = new WorkUnit(); workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.QUERY_RESULT_MATERIALIZATION.name()); workUnit.setProp(STORAGE_FORMAT_KEY, storageFormat.name()); workUnit.setProp(QUERY_RESULT_TO_MATERIALIZE_KEY, query); workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable)); TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class); HiveTask.disableHiveWatermarker(workUnit); return workUnit; }
/** * Generates a CTAS statement to dump the results of a query into a new table. * @param outputDbAndTable output db and table where contents should be written. * @param sourceQuery query to materialize. * @param storageFormat format of output table. * @param outputTableLocation location where files of output table should be written. */ public static String generateStagingCTASStatement(HiveDatasetFinder.DbAndTable outputDbAndTable, String sourceQuery, StorageFormat storageFormat, String outputTableLocation) { Preconditions.checkArgument(!Strings.isNullOrEmpty(outputDbAndTable.getDb()) && !Strings.isNullOrEmpty(outputDbAndTable.getTable()), "Invalid output db and table " + outputDbAndTable); return String.format("CREATE TEMPORARY TABLE `%s`.`%s` STORED AS %s LOCATION '%s' AS %s", outputDbAndTable.getDb(), outputDbAndTable.getTable(), storageFormat.getHiveName(), outputTableLocation, sourceQuery); }
public QueryBasedMaterializerQueryGenerator(WorkUnitState workUnitState) throws IOException { super(workUnitState); this.sourceQuery = workUnitState.getProp(HiveMaterializer.QUERY_RESULT_TO_MATERIALIZE_KEY); this.storageFormat = HiveConverterUtils.StorageFormat.valueOf(workUnitState.getProp(HiveMaterializer.STORAGE_FORMAT_KEY)); }
public MaterializeTableQueryGenerator(WorkUnitState workUnitState) throws IOException { super(workUnitState, false); this.storageFormat = HiveConverterUtils.StorageFormat.valueOf(workUnitState.getProp(HiveMaterializer.STORAGE_FORMAT_KEY)); }
/** * Create a work unit to materialize a query to a target table using a staging table in between. * @param query the query to materialize. * @param storageFormat format in which target table should be written. * @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata. */ public static WorkUnit queryResultMaterializationWorkUnit(String query, HiveConverterUtils.StorageFormat storageFormat, StageableTableMetadata destinationTable) { WorkUnit workUnit = new WorkUnit(); workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.QUERY_RESULT_MATERIALIZATION.name()); workUnit.setProp(STORAGE_FORMAT_KEY, storageFormat.name()); workUnit.setProp(QUERY_RESULT_TO_MATERIALIZE_KEY, query); workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable)); TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class); HiveTask.disableHiveWatermarker(workUnit); return workUnit; }
public MaterializeTableQueryGenerator(WorkUnitState workUnitState) throws IOException { super(workUnitState, false); this.storageFormat = HiveConverterUtils.StorageFormat.valueOf(workUnitState.getProp(HiveMaterializer.STORAGE_FORMAT_KEY)); }
public QueryBasedMaterializerQueryGenerator(WorkUnitState workUnitState) throws IOException { super(workUnitState); this.sourceQuery = workUnitState.getProp(HiveMaterializer.QUERY_RESULT_TO_MATERIALIZE_KEY); this.storageFormat = HiveConverterUtils.StorageFormat.valueOf(workUnitState.getProp(HiveMaterializer.STORAGE_FORMAT_KEY)); }