@Override protected FileSystem getSourceFileSystem(State state) throws IOException { return this.closer.register(HadoopUtils.getSourceFileSystem(state)); }
/** * Can be overriden to specify a non-pluggable {@link org.apache.gobblin.dataset.DatasetsFinder}. * @throws IOException */ protected IterableDatasetFinder createDatasetsFinder(SourceState state) throws IOException { return DatasetUtils.instantiateIterableDatasetFinder(state.getProperties(), HadoopUtils.getSourceFileSystem(state), null); }
/** * @param state a {@link org.apache.gobblin.configuration.WorkUnitState} carrying properties needed by the returned * {@link Extractor} * @return a {@link FileAwareInputStreamExtractor}. * @throws IOException */ @Override public Extractor<String, FileAwareInputStream> getExtractor(WorkUnitState state) throws IOException { Class<?> copyEntityClass = getCopyEntityClass(state); if (CopyableFile.class.isAssignableFrom(copyEntityClass)) { CopyableFile copyEntity = (CopyableFile) deserializeCopyEntity(state); return extractorForCopyableFile(HadoopUtils.getSourceFileSystem(state), copyEntity, state); } return new EmptyExtractor<>("empty"); }
Lists.newArrayList(MAX_FILES_COPIED_KEY)); final FileSystem sourceFs = HadoopUtils.getSourceFileSystem(state); final FileSystem targetFs = HadoopUtils.getWriterFileSystem(state, 1, 0); state.setProp(SlaEventKeys.SOURCE_URI, sourceFs.getUri());
@Override public List<WorkUnit> getWorkunits(SourceState state) { try { FileSystem fs = HadoopUtils.getSourceFileSystem(state); Config config = ConfigUtils.propertiesToConfig(state.getProperties()); if (state.contains(COPY_TABLE_KEY)) { HiveDataset dataset = getHiveDataset(state.getProp(COPY_TABLE_KEY), fs, state); WorkUnit workUnit = HiveMaterializer.tableCopyWorkUnit(dataset, new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null); HiveTask.disableHiveWatermarker(workUnit); return Lists.newArrayList(workUnit); } else if (state.contains(MATERIALIZE_VIEW)) { HiveDataset dataset = getHiveDataset(state.getProp(MATERIALIZE_VIEW), fs, state); WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(dataset, getOutputStorageFormat(state), new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null); HiveTask.disableHiveWatermarker(workUnit); return Lists.newArrayList(workUnit); } else if (state.contains(MATERIALIZE_QUERY)) { String query = state.getProp(MATERIALIZE_QUERY); WorkUnit workUnit = HiveMaterializer.queryResultMaterializationWorkUnit(query, getOutputStorageFormat(state), new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), null)); HiveTask.disableHiveWatermarker(workUnit); return Lists.newArrayList(workUnit); } } catch (IOException ioe) { throw new RuntimeException(ioe); } throw new RuntimeException(String.format("Must specify either %s, %s, or %s.", COPY_TABLE_KEY, MATERIALIZE_QUERY, MATERIALIZE_VIEW)); }
final FileSystem sourceFs = HadoopUtils.getSourceFileSystem(state); final FileSystem targetFs = HadoopUtils.getWriterFileSystem(state, 1, 0);
@Override protected FileSystem getSourceFileSystem(State state) throws IOException { return this.closer.register(HadoopUtils.getSourceFileSystem(state)); }
/** * Can be overriden to specify a non-pluggable {@link org.apache.gobblin.dataset.DatasetsFinder}. * @throws IOException */ protected IterableDatasetFinder createDatasetsFinder(SourceState state) throws IOException { return DatasetUtils.instantiateIterableDatasetFinder(state.getProperties(), HadoopUtils.getSourceFileSystem(state), null); }
/** * @param state a {@link org.apache.gobblin.configuration.WorkUnitState} carrying properties needed by the returned * {@link Extractor} * @return a {@link FileAwareInputStreamExtractor}. * @throws IOException */ @Override public Extractor<String, FileAwareInputStream> getExtractor(WorkUnitState state) throws IOException { Class<?> copyEntityClass = getCopyEntityClass(state); if (CopyableFile.class.isAssignableFrom(copyEntityClass)) { CopyableFile copyEntity = (CopyableFile) deserializeCopyEntity(state); return extractorForCopyableFile(HadoopUtils.getSourceFileSystem(state), copyEntity, state); } return new EmptyExtractor<>("empty"); }
Lists.newArrayList(MAX_FILES_COPIED_KEY)); final FileSystem sourceFs = HadoopUtils.getSourceFileSystem(state); final FileSystem targetFs = HadoopUtils.getWriterFileSystem(state, 1, 0); state.setProp(SlaEventKeys.SOURCE_URI, sourceFs.getUri());
@Override public List<WorkUnit> getWorkunits(SourceState state) { try { FileSystem fs = HadoopUtils.getSourceFileSystem(state); Config config = ConfigUtils.propertiesToConfig(state.getProperties()); if (state.contains(COPY_TABLE_KEY)) { HiveDataset dataset = getHiveDataset(state.getProp(COPY_TABLE_KEY), fs, state); WorkUnit workUnit = HiveMaterializer.tableCopyWorkUnit(dataset, new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null); HiveTask.disableHiveWatermarker(workUnit); return Lists.newArrayList(workUnit); } else if (state.contains(MATERIALIZE_VIEW)) { HiveDataset dataset = getHiveDataset(state.getProp(MATERIALIZE_VIEW), fs, state); WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(dataset, getOutputStorageFormat(state), new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null); HiveTask.disableHiveWatermarker(workUnit); return Lists.newArrayList(workUnit); } else if (state.contains(MATERIALIZE_QUERY)) { String query = state.getProp(MATERIALIZE_QUERY); WorkUnit workUnit = HiveMaterializer.queryResultMaterializationWorkUnit(query, getOutputStorageFormat(state), new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), null)); HiveTask.disableHiveWatermarker(workUnit); return Lists.newArrayList(workUnit); } } catch (IOException ioe) { throw new RuntimeException(ioe); } throw new RuntimeException(String.format("Must specify either %s, %s, or %s.", COPY_TABLE_KEY, MATERIALIZE_QUERY, MATERIALIZE_VIEW)); }