@Override public List<WorkUnit> getWorkunits(SourceState state) { try { FileSystem fs = HadoopUtils.getSourceFileSystem(state); Config config = ConfigUtils.propertiesToConfig(state.getProperties()); if (state.contains(COPY_TABLE_KEY)) { HiveDataset dataset = getHiveDataset(state.getProp(COPY_TABLE_KEY), fs, state); WorkUnit workUnit = HiveMaterializer.tableCopyWorkUnit(dataset, new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null); HiveTask.disableHiveWatermarker(workUnit); return Lists.newArrayList(workUnit); } else if (state.contains(MATERIALIZE_VIEW)) { HiveDataset dataset = getHiveDataset(state.getProp(MATERIALIZE_VIEW), fs, state); WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(dataset, getOutputStorageFormat(state), new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null); HiveTask.disableHiveWatermarker(workUnit); return Lists.newArrayList(workUnit); } else if (state.contains(MATERIALIZE_QUERY)) { String query = state.getProp(MATERIALIZE_QUERY); WorkUnit workUnit = HiveMaterializer.queryResultMaterializationWorkUnit(query, getOutputStorageFormat(state), new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), null)); HiveTask.disableHiveWatermarker(workUnit); return Lists.newArrayList(workUnit); } } catch (IOException ioe) { throw new RuntimeException(ioe); } throw new RuntimeException(String.format("Must specify either %s, %s, or %s.", COPY_TABLE_KEY, MATERIALIZE_QUERY, MATERIALIZE_VIEW)); }
/** * Create a work unit to materialize a query to a target table using a staging table in between. * @param query the query to materialize. * @param storageFormat format in which target table should be written. * @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata. */ public static WorkUnit queryResultMaterializationWorkUnit(String query, HiveConverterUtils.StorageFormat storageFormat, StageableTableMetadata destinationTable) { WorkUnit workUnit = new WorkUnit(); workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.QUERY_RESULT_MATERIALIZATION.name()); workUnit.setProp(STORAGE_FORMAT_KEY, storageFormat.name()); workUnit.setProp(QUERY_RESULT_TO_MATERIALIZE_KEY, query); workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable)); TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class); HiveTask.disableHiveWatermarker(workUnit); return workUnit; }
private TaskContext getTaskContextForRun(WorkUnit workUnit) { workUnit.setProp(ConfigurationKeys.JOB_ID_KEY, "job123"); workUnit.setProp(ConfigurationKeys.TASK_ID_KEY, "task123"); workUnit.setProp(HiveConverterUtils.HIVE_DATASET_DESTINATION_SKIP_SETGROUP, Boolean.toString(true)); HiveTask.disableHiveWatermarker(workUnit); JobState jobState = new JobState("job", "job123"); return new TaskContext(new WorkUnitState(workUnit, jobState)); }
@Override public List<WorkUnit> getWorkunits(SourceState state) { try { FileSystem fs = HadoopUtils.getSourceFileSystem(state); Config config = ConfigUtils.propertiesToConfig(state.getProperties()); if (state.contains(COPY_TABLE_KEY)) { HiveDataset dataset = getHiveDataset(state.getProp(COPY_TABLE_KEY), fs, state); WorkUnit workUnit = HiveMaterializer.tableCopyWorkUnit(dataset, new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null); HiveTask.disableHiveWatermarker(workUnit); return Lists.newArrayList(workUnit); } else if (state.contains(MATERIALIZE_VIEW)) { HiveDataset dataset = getHiveDataset(state.getProp(MATERIALIZE_VIEW), fs, state); WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(dataset, getOutputStorageFormat(state), new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null); HiveTask.disableHiveWatermarker(workUnit); return Lists.newArrayList(workUnit); } else if (state.contains(MATERIALIZE_QUERY)) { String query = state.getProp(MATERIALIZE_QUERY); WorkUnit workUnit = HiveMaterializer.queryResultMaterializationWorkUnit(query, getOutputStorageFormat(state), new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), null)); HiveTask.disableHiveWatermarker(workUnit); return Lists.newArrayList(workUnit); } } catch (IOException ioe) { throw new RuntimeException(ioe); } throw new RuntimeException(String.format("Must specify either %s, %s, or %s.", COPY_TABLE_KEY, MATERIALIZE_QUERY, MATERIALIZE_VIEW)); }
/** * Create a work unit to materialize a query to a target table using a staging table in between. * @param query the query to materialize. * @param storageFormat format in which target table should be written. * @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata. */ public static WorkUnit queryResultMaterializationWorkUnit(String query, HiveConverterUtils.StorageFormat storageFormat, StageableTableMetadata destinationTable) { WorkUnit workUnit = new WorkUnit(); workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.QUERY_RESULT_MATERIALIZATION.name()); workUnit.setProp(STORAGE_FORMAT_KEY, storageFormat.name()); workUnit.setProp(QUERY_RESULT_TO_MATERIALIZE_KEY, query); workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable)); TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class); HiveTask.disableHiveWatermarker(workUnit); return workUnit; }