/** * Define the {@link TaskFactory} that should be used to run this task. */ public static void setTaskFactoryClass(State state, Class<? extends TaskFactory> klazz) { state.setProp(TASK_FACTORY_CLASS, klazz.getName()); }
/*** * Serialize a {@link QueryBasedHivePublishEntity} into a {@link State} at {@link #SERIALIZED_PUBLISH_TABLE_COMMANDS}. * @param state {@link State} to serialize entity into. * @param queryBasedHivePublishEntity to carry to publisher. */ public static void serializePublishCommands(State state, QueryBasedHivePublishEntity queryBasedHivePublishEntity) { state.setProp(HiveAvroORCQueryGenerator.SERIALIZED_PUBLISH_TABLE_COMMANDS, GSON.toJson(queryBasedHivePublishEntity)); }
/** * Set number of schema evolution DDLs as Sla event metadata */ public static void setEvolutionMetadata(State state, List<String> evolutionDDLs) { state.setProp(EventConstants.SCHEMA_EVOLUTION_DDLS_NUM, evolutionDDLs == null ? 0 : evolutionDDLs.size()); }
public static void setBeginDDLBuildTimeMetadata(State state, long time) { state.setProp(EventConstants.BEGIN_DDL_BUILD_TIME, Long.toString(time)); }
public static void setEndDDLBuildTimeMetadata(State state, long time) { state.setProp(EventConstants.END_DDL_BUILD_TIME, Long.toString(time)); }
/** * Serialize a {@link List} of {@link CopyEntity}s into a {@link State} at {@link #SERIALIZED_COPYABLE_FILE} */ public static void serializeCopyEntity(State state, CopyEntity copyEntity) { state.setProp(SERIALIZED_COPYABLE_FILE, CopyEntity.serialize(copyEntity)); state.setProp(COPY_ENTITY_CLASS, copyEntity.getClass().getName()); }
public static void serializeJobToState(State state, Job job) { for (Map.Entry<String, String> entry : job.getConfiguration()) { state.setProp(JOB_CONFIGURATION_PREFIX + entry.getKey(), entry.getValue()); } }
public static State getStateFromConf(Configuration conf) { State state = new State(); for (Entry<String, String> entry : conf) { state.setProp(entry.getKey(), entry.getValue()); } return state; }
/** * Helper method that serializes a {@link FsPermission} object. */ private static void serializeFsPermissions(State state, String key, FsPermission fsPermissions) { state.setProp(key, String.format("%04o", fsPermissions.toShort())); }
/** * Given a {@link String} in octal notation, set a key, value pair in the given {@link State} for the writer to * use when creating directories. This method should be used in conjunction with {@link #deserializeWriterDirPermissions(State, int, int)}. */ public static void setWriterDirOctalPermissions(State state, int numBranches, int branchId, String octalPermissions) { state.setProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_DIR_PERMISSIONS, numBranches, branchId), octalPermissions); }
/** * Serialize a {@link CopyableDataset} into a {@link State} at {@link #SERIALIZED_COPYABLE_DATASET} */ public static void serializeCopyableDataset(State state, CopyableDatasetMetadata copyableDataset) { state.setProp(SERIALIZED_COPYABLE_DATASET, copyableDataset.serialize()); }
/** * Given a {@link String} in octal notation, set a key, value pair in the given {@link State} for the writer to * use when creating files. This method should be used in conjunction with {@link #deserializeWriterFilePermissions(State, int, int)}. */ public static void setWriterFileOctalPermissions(State state, int numBranches, int branchId, String octalPermissions) { state.setProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_PERMISSIONS, numBranches, branchId), octalPermissions); }
/** * Set a unique, replicable guid for this work unit. Used for recovering partially successful work units. * @param state {@link State} where guid should be written. * @param guid A byte array guid. */ public static void setWorkUnitGuid(State state, Guid guid) { state.setProp(WORK_UNIT_GUID, guid.toString()); }
/** * Serialize a dataset {@link FileSystemDataset} to a {@link State} * @param dataset A dataset needs serialization * @param state A state that is used to save {@link gobblin.dataset.Dataset} */ public void save (FileSystemDataset dataset, State state) { state.setProp(SERIALIZE_COMPACTION_FILE_PATH_NAME, dataset.datasetURN()); }
private static State getSerDeProps(SerDeInfo si) { State serDeProps = new State(); for (Map.Entry<String, String> entry : si.getParameters().entrySet()) { serDeProps.setProp(entry.getKey(), entry.getValue()); } if (si.isSetSerializationLib()) { serDeProps.setProp(HiveConstants.SERDE_TYPE, si.getSerializationLib()); } return serDeProps; }
public static void modifyDatasetStateToRecompact (Dataset dataset) { // Modify the dataset for recompaction LOG.info ("{} changes to recompact mode", dataset.getDatasetName()); State recompactState = new State(); recompactState.setProp(MRCompactor.COMPACTION_RECOMPACT_FROM_DEST_PATHS, Boolean.TRUE); recompactState.setProp(MRCompactor.COMPACTION_JOB_LATE_DATA_MOVEMENT_TASK, Boolean.FALSE); dataset.modifyDatasetForRecompact(recompactState); dataset.setState(VERIFIED); }
/** * {@link Deprecated} use {@link #getEventSubmitterBuilder(Dataset, Optional, FileSystem)} */ @Deprecated public static void populateState(Dataset dataset, Optional<Job> job, FileSystem fs) { dataset.jobProps().setProp(SlaEventKeys.DATASET_URN_KEY, dataset.getUrn()); dataset.jobProps().setProp(SlaEventKeys.PARTITION_KEY, dataset.jobProps().getProp(MRCompactor.COMPACTION_JOB_DEST_PARTITION, "")); dataset.jobProps().setProp(SlaEventKeys.DEDUPE_STATUS_KEY, getOutputDedupeStatus(dataset.jobProps())); dataset.jobProps().setProp(SlaEventKeys.PREVIOUS_PUBLISH_TS_IN_MILLI_SECS_KEY, getPreviousPublishTime(dataset, fs)); dataset.jobProps().setProp(SlaEventKeys.RECORD_COUNT_KEY, getRecordCount(job)); }
/** * Create a CheckpointableWatermarkState object from a CheckpointableWatermark * @param watermark: the checkpointable watermark * @param gson: the instance of {@link Gson} to use for serializing the {@param watermark}. */ public CheckpointableWatermarkState(CheckpointableWatermark watermark, Gson gson) { super.setProp(watermark.getSource(), gson.toJsonTree(watermark)); super.setId(watermark.getSource()); }
private void addInputLateFilesForFirstTimeCompaction(State jobProps, Dataset dataset) throws IOException { if ((latePathsFound(dataset)) && (this.outputDeduplicated)) { dataset.addAdditionalInputPaths(dataset.inputLatePaths()); if (this.outputDeduplicated) { // If input contains late data (i.e., input data is not deduplicated) and output data should be deduplicated, // run a deduping compaction instead of non-deduping compaction. jobProps.setProp(MRCompactor.COMPACTION_SHOULD_DEDUPLICATE, true); } } }
@Override public final DataWriter<FileAwareInputStream> build() throws IOException { setJobSpecificOutputPaths(this.destination.getProperties()); // Each writer/mapper gets its own task-staging directory this.destination.getProperties().setProp(ConfigurationKeys.WRITER_FILE_PATH, this.writerId); return buildWriter(); }