protected void updateSerDeFields(State state, String key, Object value) { boolean isExistingField = true; switch (key) { case HiveConstants.SERDE_TYPE: this.serDeType = Optional.of((String) value); break; default: isExistingField = false; } if (isExistingField) { state.removeProp(key); } }
@Override public void removeProp(String key) { super.removeProp(key); this.workUnit.removeProp(key); this.jobState.removeProp(key); }
protected void updateTablePartitionFields(State state, String key, Object value) { boolean isExistingField = true; switch (key) { case HiveConstants.CREATE_TIME: this.createTime = Optional.of((Long) value); break; case HiveConstants.LAST_ACCESS_TIME: this.createTime = Optional.of((Long) value); break; default: isExistingField = false; } if (isExistingField) { state.removeProp(key); } }
@Override protected void updateTablePartitionFields(State state, String key, Object value) { super.updateTablePartitionFields(state, key, value); boolean isExistingField = true; switch (key) { case HiveConstants.OWNER: this.owner = Optional.of((String) value); break; case HiveConstants.TABLE_TYPE: this.tableType = Optional.of((String) value); break; case HiveConstants.RETENTION: this.retention = Optional.of((Long) value); break; default: isExistingField = false; } if (isExistingField) { state.removeProp(key); } }
state.removeProp(key);
/** * Get the partition info of a work unit from the {@code state}. Then partition info will be removed from the * {@code state} to avoid persisting useless information * * <p> * In Gobblin, only the {@link PartitionedDataWriter} knows all partitions written for a work unit. Each partition * {@link DataWriter} decides the actual form of a dataset partition * </p> */ public static List<PartitionDescriptor> getPartitionInfoAndClean(State state, int branchId) { String partitionsKey = getPartitionsKey(branchId); String json = state.getProp(partitionsKey); if (Strings.isNullOrEmpty(json)) { return Lists.newArrayList(); } state.removeProp(partitionsKey); return PartitionDescriptor.fromPartitionJsonList(json); } }
@Override public void run() { for (int j = 0; j < 1000; j++) { for (int i = 0; i < 1000; i++) { try { state.removeProp(Integer.toString(i)); state.setProp(Integer.toString(i), Integer.toString(i)); } catch (Throwable t) { exceptions.add(t); } } } } });
@SuppressWarnings({ "serial", "unchecked" }) protected static <T> Optional<T> populateField(State state, String key, TypeToken<T> token) { if (state.contains(key)) { Optional<T> fieldValue; if (new TypeToken<Boolean>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsBoolean(key)); } else if (new TypeToken<Integer>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsInt(key)); } else if (new TypeToken<Long>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsLong(key)); } else if (new TypeToken<List<String>>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsList(key)); } else { fieldValue = (Optional<T>) Optional.of(state.getProp(key)); } state.removeProp(key); return fieldValue; } return Optional.<T> absent(); }
/** * Convert deprecated keys {@value #EXTRACT_LIMIT_RATE_LIMIT_KEY_DEP}, {@value #EXTRACT_LIMIT_RATE_LIMIT_TIMEUNIT_KEY_DEP}, * {@value #EXTRACT_LIMIT_TIME_LIMIT_KEY_DEP}, and {@value #EXTRACT_LIMIT_TIME_LIMIT_TIMEUNIT_KEY_DEP}, since they are not * TypeSafe compatible. The deprecated keys will be removed from @param state, and replaced with * {@value #EXTRACT_LIMIT_RATE_LIMIT_KEY}, {@value #EXTRACT_LIMIT_RATE_LIMIT_TIMEUNIT_KEY}, {@value #EXTRACT_LIMIT_TIME_LIMIT_KEY}, * and {@value #EXTRACT_LIMIT_TIME_LIMIT_TIMEUNIT_KEY}, respectively. */ private static State convertDeprecatedConfigs(State state) { if (state.contains(EXTRACT_LIMIT_RATE_LIMIT_KEY_DEP)) { state.setProp(EXTRACT_LIMIT_RATE_LIMIT_KEY, state.getProp(EXTRACT_LIMIT_RATE_LIMIT_KEY_DEP)); state.removeProp(EXTRACT_LIMIT_RATE_LIMIT_KEY_DEP); } if (state.contains(EXTRACT_LIMIT_RATE_LIMIT_TIMEUNIT_KEY_DEP)) { state.setProp(EXTRACT_LIMIT_RATE_LIMIT_TIMEUNIT_KEY, state.getProp(EXTRACT_LIMIT_RATE_LIMIT_TIMEUNIT_KEY_DEP)); state.removeProp(EXTRACT_LIMIT_RATE_LIMIT_TIMEUNIT_KEY_DEP); } if (state.contains(EXTRACT_LIMIT_TIME_LIMIT_KEY_DEP)) { state.setProp(EXTRACT_LIMIT_TIME_LIMIT_KEY, state.getProp(EXTRACT_LIMIT_TIME_LIMIT_KEY_DEP)); state.removeProp(EXTRACT_LIMIT_TIME_LIMIT_KEY_DEP); } if (state.contains(EXTRACT_LIMIT_TIME_LIMIT_TIMEUNIT_KEY_DEP)) { state.setProp(EXTRACT_LIMIT_TIME_LIMIT_TIMEUNIT_KEY, state.getProp(EXTRACT_LIMIT_TIME_LIMIT_TIMEUNIT_KEY_DEP)); state.removeProp(EXTRACT_LIMIT_TIME_LIMIT_TIMEUNIT_KEY_DEP); } return state; } }
String md = new GlobalMetadata().toJson(); s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR); s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true"); s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
String md = new GlobalMetadata().toJson(); s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR); s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true"); s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
@Test public void testNoOutputWhenDisabledWithPartitions() throws IOException { File publishPath = Files.createTempDir(); State s = buildDefaultState(1); s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR); s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE); s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath()); WorkUnitState wuState = new WorkUnitState(); addStateToWorkunit(s, wuState); wuState.setProp(ConfigurationKeys.WRITER_METADATA_KEY, "abcdefg"); FsWriterMetrics metrics1 = buildWriterMetrics("foo1.json", "1-2-3-4", 0, 10); FsWriterMetrics metrics2 = buildWriterMetrics("foo1.json", "5-6-7-8",10, 20); wuState.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4"); wuState.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson()); wuState.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + "_0", "1-2-3-4"); wuState.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + " _0", metrics2.toJson()); wuState.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + "_1", "5-6-7-8"); wuState.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + " _1", metrics2.toJson()); BaseDataPublisher publisher = new BaseDataPublisher(s); publisher.publishMetadata(Collections.singletonList(wuState)); String[] filesInPublishDir = publishPath.list(); Assert.assertEquals(0, filesInPublishDir.length, "Expected 0 files to be output to publish path"); }
String md = new GlobalMetadata().toJson(); s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR); s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY + ".0", "true"); s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY + ".1", "true");
String md = new GlobalMetadata().toJson(); s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR); s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true"); s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
String md = new GlobalMetadata().toJson(); s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR); s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true"); s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
protected void updateSerDeFields(State state, String key, Object value) { boolean isExistingField = true; switch (key) { case HiveConstants.SERDE_TYPE: this.serDeType = Optional.of((String) value); break; default: isExistingField = false; } if (isExistingField) { state.removeProp(key); } }
@Override public void removeProp(String key) { super.removeProp(key); this.workUnit.removeProp(key); this.jobState.removeProp(key); }
protected void updateTablePartitionFields(State state, String key, Object value) { boolean isExistingField = true; switch (key) { case HiveConstants.CREATE_TIME: this.createTime = Optional.of((Long) value); break; case HiveConstants.LAST_ACCESS_TIME: this.createTime = Optional.of((Long) value); break; default: isExistingField = false; } if (isExistingField) { state.removeProp(key); } }
/** * Get the partition info of a work unit from the {@code state}. Then partition info will be removed from the * {@code state} to avoid persisting useless information * * <p> * In Gobblin, only the {@link PartitionedDataWriter} knows all partitions written for a work unit. Each partition * {@link DataWriter} decides the actual form of a dataset partition * </p> */ public static List<PartitionDescriptor> getPartitionInfoAndClean(State state, int branchId) { String partitionsKey = getPartitionsKey(branchId); String json = state.getProp(partitionsKey); if (Strings.isNullOrEmpty(json)) { return Lists.newArrayList(); } state.removeProp(partitionsKey); return PartitionDescriptor.fromPartitionJsonList(json); } }
@SuppressWarnings({ "serial", "unchecked" }) protected static <T> Optional<T> populateField(State state, String key, TypeToken<T> token) { if (state.contains(key)) { Optional<T> fieldValue; if (new TypeToken<Boolean>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsBoolean(key)); } else if (new TypeToken<Integer>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsInt(key)); } else if (new TypeToken<Long>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsLong(key)); } else if (new TypeToken<List<String>>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsList(key)); } else { fieldValue = (Optional<T>) Optional.of(state.getProp(key)); } state.removeProp(key); return fieldValue; } return Optional.<T> absent(); }