TableType tableType = TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()); List<WorkUnitState> previousWorkunits = Lists.newArrayList(state.getPreviousWorkUnitStates()); Set<String> prevFsSnapshot = Sets.newHashSet();
if (state.getPropAsBoolean(KafkaSource.GOBBLIN_KAFKA_EXTRACT_ALLOW_TABLE_TYPE_NAMESPACE_CUSTOMIZATION)) { String tableTypeStr = state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, KafkaSource.DEFAULT_TABLE_TYPE.toString()); tableType = Extract.TableType.valueOf(tableTypeStr); extractNamespace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, KafkaSource.DEFAULT_NAMESPACE_NAME);
table.setName(extract.getTable()); if (extract.hasType()) { table.setType(TableTypeEnum.valueOf(extract.getType().name()));
TableType tableType = TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()); List<WorkUnitState> previousWorkunits = Lists.newArrayList(state.getPreviousWorkUnitStates()); Set<String> prevFsSnapshot = Sets.newHashSet();
if (state.getPropAsBoolean(KafkaSource.GOBBLIN_KAFKA_EXTRACT_ALLOW_TABLE_TYPE_NAMESPACE_CUSTOMIZATION)) { String tableTypeStr = state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, KafkaSource.DEFAULT_TABLE_TYPE.toString()); tableType = Extract.TableType.valueOf(tableTypeStr); extractNamespace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, KafkaSource.DEFAULT_NAMESPACE_NAME);
table.setName(extract.getTable()); if (extract.hasType()) { table.setType(TableTypeEnum.valueOf(extract.getType().name()));
State topicState = topicSpecificState.get(); if (topicState.contains(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY)) { currentTableType = Extract.TableType.valueOf(topicState.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY));
? Extract.TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()) : null; String tableNamespace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY); String tableName = state.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY);
Extract.TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()) : null; String tableNamespace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY); String tableName = state.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY);
String extractId = DTF.print(new DateTime()); super.addAll(state); super.setProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, type.toString()); super.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, namespace); super.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, table);
protected List<WorkUnit> generateWorkUnits(SourceEntity sourceEntity, SourceState state, long previousWatermark) { List<WorkUnit> workUnits = Lists.newArrayList(); String nameSpaceName = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY); TableType tableType = TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()); List<Partition> partitions = new Partitioner(state).getPartitionList(previousWatermark); Collections.sort(partitions, Partitioner.ascendingComparator); // {@link ConfigurationKeys.EXTRACT_TABLE_NAME_KEY} specify the output path for Extract String outputTableName = sourceEntity.getDestTableName(); log.info("Create extract output with table name is " + outputTableName); Extract extract = createExtract(tableType, nameSpaceName, outputTableName); // Setting current time for the full extract if (Boolean.valueOf(state.getProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY))) { extract.setFullTrue(System.currentTimeMillis()); } for (Partition partition : partitions) { WorkUnit workunit = WorkUnit.create(extract); workunit.setProp(ConfigurationKeys.SOURCE_ENTITY, sourceEntity.getSourceEntityName()); workunit.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, sourceEntity.getDestTableName()); workunit.setProp(WORK_UNIT_STATE_VERSION_KEY, CURRENT_WORK_UNIT_STATE_VERSION); addLineageSourceInfo(state, sourceEntity, workunit); partition.serialize(workunit); workUnits.add(workunit); } return workUnits; }
/** * Gobblin calls the {@link Source#getWorkunits(SourceState)} method after creating a {@link Source} object with a * blank constructor, so any custom initialization of the object needs to be done here. */ protected void init(SourceState state) { retriever.init(state); try { initFileSystemHelper(state); } catch (FileBasedHelperException e) { Throwables.propagate(e); } AvroFsHelper fsHelper = (AvroFsHelper) this.fsHelper; this.fs = fsHelper.getFileSystem(); this.sourceState = state; this.lowWaterMark = getLowWaterMark(state.getPreviousWorkUnitStates(), state.getProp(DATE_PARTITIONED_SOURCE_MIN_WATERMARK_VALUE, String.valueOf(DEFAULT_DATE_PARTITIONED_SOURCE_MIN_WATERMARK_VALUE))); this.maxFilesPerJob = state.getPropAsInt(DATE_PARTITIONED_SOURCE_MAX_FILES_PER_JOB, DEFAULT_DATE_PARTITIONED_SOURCE_MAX_FILES_PER_JOB); this.maxWorkUnitsPerJob = state.getPropAsInt(DATE_PARTITIONED_SOURCE_MAX_WORKUNITS_PER_JOB, DEFAULT_DATE_PARTITIONED_SOURCE_MAX_WORKUNITS_PER_JOB); this.tableType = TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()); this.fileCount = 0; this.sourceDir = new Path(state.getProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY)); }
/** * Get the {@link TableType} of the table. * * @return {@link TableType} of the table */ public TableType getType() { return TableType.valueOf(getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY)); }
State topicState = topicSpecificState.get(); if (topicState.contains(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY)) { currentTableType = Extract.TableType.valueOf(topicState.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY));
String extractId = DTF.print(new DateTime()); super.addAll(state); super.setProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, type.toString()); super.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, namespace); super.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, table);
Extract.TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()) : null; String tableNamespace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY); String tableName = state.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY);
? Extract.TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()) : null; String tableNamespace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY); String tableName = state.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY);
protected List<WorkUnit> generateWorkUnits(SourceEntity sourceEntity, SourceState state, long previousWatermark) { List<WorkUnit> workUnits = Lists.newArrayList(); String nameSpaceName = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY); TableType tableType = TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()); List<Partition> partitions = new Partitioner(state).getPartitionList(previousWatermark); Collections.sort(partitions, Partitioner.ascendingComparator); // {@link ConfigurationKeys.EXTRACT_TABLE_NAME_KEY} specify the output path for Extract String outputTableName = sourceEntity.getDestTableName(); log.info("Create extract output with table name is " + outputTableName); Extract extract = createExtract(tableType, nameSpaceName, outputTableName); // Setting current time for the full extract if (Boolean.valueOf(state.getProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY))) { extract.setFullTrue(System.currentTimeMillis()); } for (Partition partition : partitions) { WorkUnit workunit = WorkUnit.create(extract); workunit.setProp(ConfigurationKeys.SOURCE_ENTITY, sourceEntity.getSourceEntityName()); workunit.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, sourceEntity.getDestTableName()); workunit.setProp(WORK_UNIT_STATE_VERSION_KEY, CURRENT_WORK_UNIT_STATE_VERSION); addLineageSourceInfo(state, sourceEntity, workunit); partition.serialize(workunit); workUnits.add(workunit); } return workUnits; }
/** * Gobblin calls the {@link Source#getWorkunits(SourceState)} method after creating a {@link Source} object with a * blank constructor, so any custom initialization of the object needs to be done here. */ protected void init(SourceState state) { retriever.init(state); try { initFileSystemHelper(state); } catch (FileBasedHelperException e) { Throwables.propagate(e); } AvroFsHelper fsHelper = (AvroFsHelper) this.fsHelper; this.fs = fsHelper.getFileSystem(); this.sourceState = state; this.lowWaterMark = getLowWaterMark(state.getPreviousWorkUnitStates(), state.getProp(DATE_PARTITIONED_SOURCE_MIN_WATERMARK_VALUE, String.valueOf(DEFAULT_DATE_PARTITIONED_SOURCE_MIN_WATERMARK_VALUE))); this.maxFilesPerJob = state.getPropAsInt(DATE_PARTITIONED_SOURCE_MAX_FILES_PER_JOB, DEFAULT_DATE_PARTITIONED_SOURCE_MAX_FILES_PER_JOB); this.maxWorkUnitsPerJob = state.getPropAsInt(DATE_PARTITIONED_SOURCE_MAX_WORKUNITS_PER_JOB, DEFAULT_DATE_PARTITIONED_SOURCE_MAX_WORKUNITS_PER_JOB); this.tableType = TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()); this.fileCount = 0; this.sourceDir = new Path(state.getProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY)); }
@Test public void testFailJobWhenPreviousStateExistsButDoesNotHaveSnapshot() { try { DummyFileBasedSource source = new DummyFileBasedSource(); WorkUnitState workUnitState = new WorkUnitState(); workUnitState.setId("priorState"); List<WorkUnitState> workUnitStates = Lists.newArrayList(workUnitState); State state = new State(); state.setProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, Extract.TableType.SNAPSHOT_ONLY.toString()); state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_PRIOR_SNAPSHOT_REQUIRED, true); SourceState sourceState = new SourceState(state, workUnitStates); source.getWorkunits(sourceState); Assert.fail("Expected RuntimeException, but no exceptions were thrown."); } catch (RuntimeException e) { Assert.assertEquals("No 'source.filebased.fs.snapshot' found on state of prior job", e.getMessage()); } }