/** * Return true if the current publisher can be skipped. * * <p> * For a publisher that can be skipped, it should not have any effect on state persistence. It will be skipped when * a job is cancelled, and all finished tasks are configured to be committed. * </p> */ public boolean canBeSkipped() { return this.state.getPropAsBoolean(ConfigurationKeys.DATA_PUBLISHER_CAN_BE_SKIPPED, ConfigurationKeys.DEFAULT_DATA_PUBLISHER_CAN_BE_SKIPPED); }
private boolean getRecompactDatasets() { return this.state.getPropAsBoolean(MRCompactor.COMPACTION_RECOMPACT_FROM_DEST_PATHS, MRCompactor.DEFAULT_COMPACTION_RECOMPACT_FROM_DEST_PATHS); }
private boolean shouldPublishDataIfCannotVerifyCompl() { return this.state.getPropAsBoolean(COMPACTION_COMPLETENESS_VERIFICATION_PUBLISH_DATA_IF_CANNOT_VERIFY, DEFAULT_COMPACTION_COMPLETENESS_VERIFICATION_PUBLISH_DATA_IF_CANNOT_VERIFY); }
private static boolean shouldUseStrongEncryptor(State state) { return state.getPropAsBoolean(ConfigurationKeys.ENCRYPT_USE_STRONG_ENCRYPTOR, ConfigurationKeys.DEFAULT_ENCRYPT_USE_STRONG_ENCRYPTOR); }
private boolean shouldVerifyDataCompleteness() { return this.state.getPropAsBoolean(COMPACTION_COMPLETENESS_VERIFICATION_ENABLED, DEFAULT_COMPACTION_COMPLETENESS_VERIFICATION_ENABLED); }
public CloseOnFlushWriterWrapper(Supplier<DataWriter<D>> writerSupplier, State state) { Preconditions.checkNotNull(state, "State is required."); this.state = state; this.writerSupplier = writerSupplier; this.writer = writerSupplier.get(); this.closed = false; this.closeOnFlush = this.state.getPropAsBoolean(WRITER_CLOSE_ON_FLUSH_KEY, DEFAULT_WRITER_CLOSE_ON_FLUSH); this.controlMessageHandler = new CloseOnFlushWriterMessageHandler(); this.closeOnMetadataUpdate = this.state.getPropAsBoolean(WRITER_CLOSE_ON_METADATA_UPDATE, DEFAULT_CLOSE_ON_METADATA_UPDATE); }
public HivePartitionVersionRetentionCleaner(CleanableDataset dataset, DatasetVersion version, List<String> nonDeletableVersionLocations, State state) { super(dataset, version, nonDeletableVersionLocations, state); this.versionOwner = ((HivePartitionRetentionVersion) this.datasetVersion).getOwner(); this.simulate = this.state.getPropAsBoolean(ComplianceConfigurationKeys.COMPLIANCE_JOB_SIMULATE, ComplianceConfigurationKeys.DEFAULT_COMPLIANCE_JOB_SIMULATE); }
/** * Constructor * @param state A task level state */ public CompactionAvroJobConfigurator(State state) throws IOException { this.state = state; this.fs = getFileSystem(state); this.shouldDeduplicate = state.getPropAsBoolean(MRCompactor.COMPACTION_SHOULD_DEDUPLICATE, true); }
public MRCompactorAvroKeyDedupJobRunner(Dataset dataset, FileSystem fs) { super(dataset, fs); this.useSingleInputSchema = this.dataset.jobProps().getPropAsBoolean(COMPACTION_JOB_AVRO_SINGLE_INPUT_SCHEMA, true); }
protected String getPartitionPath(State properties) { if (this.partition.isPresent()) { boolean includePartitionerFieldNames = properties.getPropAsBoolean(ForkOperatorUtils .getPropertyNameForBranch(WRITER_INCLUDE_PARTITION_IN_FILE_NAMES, this.branches, this.branch), false); boolean removePathSeparators = properties.getPropAsBoolean(ForkOperatorUtils .getPropertyNameForBranch(WRITER_REPLACE_PATH_SEPARATORS_IN_PARTITIONS, this.branches, this.branch), false); return AvroUtils.serializeAsPath(this.partition.get(), includePartitionerFieldNames, removePathSeparators).toString(); } else { return null; } }
protected void movePath(ParallelRunner parallelRunner, State state, Path src, Path dst, int branchId) throws IOException { LOG.info(String.format("Moving %s to %s", src, dst)); boolean overwrite = state.getPropAsBoolean(ConfigurationKeys.DATA_PUBLISHER_OVERWRITE_ENABLED, false); this.publisherOutputDirs.addAll(recordPublisherOutputDirs(src, dst, branchId)); parallelRunner.movePath(src, this.publisherFileSystemByBranches.get(branchId), dst, overwrite, this.publisherFinalDirOwnerGroupsByBranches.get(branchId)); }
public HivePartitionVersionRetentionReaper(CleanableDataset dataset, DatasetVersion version, List<String> nonDeletableVersionLocations, State state) { super(dataset, version, nonDeletableVersionLocations, state); this.versionOwner = ((HivePartitionVersion) this.datasetVersion).getOwner(); Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.BACKUP_OWNER), "Missing required property " + ComplianceConfigurationKeys.BACKUP_OWNER); this.backUpOwner = Optional.fromNullable(this.state.getProp(ComplianceConfigurationKeys.BACKUP_OWNER)); this.simulate = this.state.getPropAsBoolean(ComplianceConfigurationKeys.COMPLIANCE_JOB_SIMULATE, ComplianceConfigurationKeys.DEFAULT_COMPLIANCE_JOB_SIMULATE); }
private static String getOutputDedupeStatus(State state) { return state.getPropAsBoolean(MRCompactor.COMPACTION_OUTPUT_DEDUPLICATED, MRCompactor.DEFAULT_COMPACTION_OUTPUT_DEDUPLICATED) ? DedupeStatus.DEDUPED.toString() : DedupeStatus.NOT_DEDUPED.toString(); }
private boolean shouldPublishWriterMetadataForBranch(int branchId) { String keyName = ForkOperatorUtils .getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, this.numBranches, branchId); return this.getState().getPropAsBoolean(keyName, false); }
/** * A {@link Dataset} should be verified if its not already compacted, and it satisfies the blacklist and whitelist. */ private boolean shouldVerifyCompletenessForDataset(Dataset dataset, List<Pattern> blacklist, List<Pattern> whitelist) { boolean renamingRequired = this.state.getPropAsBoolean(COMPACTION_RENAME_SOURCE_DIR_ENABLED, DEFAULT_COMPACTION_RENAME_SOURCE_DIR_ENABLED); LOG.info ("Should verify completeness with renaming source dir : " + renamingRequired); return !datasetAlreadyCompacted(this.fs, dataset, renamingRequired) && DatasetFilterUtils.survived(dataset.getName(), blacklist, whitelist); }
/** * @param state This is a Job State */ public HiveRegistrationPublisher(State state) { super(state); this.hiveRegister = this.closer.register(HiveRegister.get(state)); this.hivePolicyExecutor = ExecutorsUtils.loggingDecorator(Executors.newFixedThreadPool(new HiveRegProps(state).getNumThreads(), ExecutorsUtils.newThreadFactory(Optional.of(log), Optional.of("HivePolicyExecutor-%d")))); this.metricContext = Instrumented.getMetricContext(state, HiveRegistrationPublisher.class); isPathDedupeEnabled = state.getPropAsBoolean(PATH_DEDUPE_ENABLED, this.DEFAULT_PATH_DEDUPE_ENABLED); }
private void createFileSystem(String uri) throws IOException, InterruptedException, URISyntaxException { if (this.state.getPropAsBoolean(ConfigurationKeys.SHOULD_FS_PROXY_AS_USER, ConfigurationKeys.DEFAULT_SHOULD_FS_PROXY_AS_USER)) { // Initialize file system as a proxy user. this.fs = new ProxiedFileSystemWrapper().getProxiedFileSystem(this.state, ProxiedFileSystemWrapper.AuthType.TOKEN, this.state.getProp(ConfigurationKeys.FS_PROXY_AS_USER_TOKEN_FILE), uri, configuration); } else { // Initialize file system as the current user. this.fs = FileSystem.newInstance(URI.create(uri), this.configuration); } }
private void configureSchema(Job job) throws IOException { Schema newestSchema = MRCompactorAvroKeyDedupJobRunner.getNewestSchemaFromSource(job, this.fs); if (newestSchema != null) { if (this.state.getPropAsBoolean(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_AVRO_SINGLE_INPUT_SCHEMA, true)) { AvroJob.setInputKeySchema(job, newestSchema); } AvroJob.setMapOutputKeySchema(job, this.shouldDeduplicate ? getKeySchema(job, newestSchema) : newestSchema); AvroJob.setMapOutputValueSchema(job, newestSchema); AvroJob.setOutputKeySchema(job, newestSchema); } }
public static void setProxySettingsForFs(State state) { if (state.getPropAsBoolean(ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_SHOULD_PROXY, ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_DEFAULT_SHOULD_PROXY)) { String proxyUser = state.getProp(ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_PROXY_USER); String superUser = state.getProp(ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_SUPER_USER); String realm = state.getProp(ConfigurationKeys.KERBEROS_REALM); state.setProp(ConfigurationKeys.SHOULD_FS_PROXY_AS_USER, true); state.setProp(ConfigurationKeys.FS_PROXY_AS_USER_NAME, proxyUser); state.setProp(ConfigurationKeys.SUPER_USER_NAME_TO_PROXY_AS_OTHERS, HostUtils.getPrincipalUsingHostname(superUser, realm)); state.setProp(ConfigurationKeys.FS_PROXY_AUTH_METHOD, ConfigurationKeys.KERBEROS_AUTH); } }
public HiveMetaStoreBasedRegister(State state, Optional<String> metastoreURI) throws IOException { super(state); this.optimizedChecks = state.getPropAsBoolean(this.OPTIMIZED_CHECK_ENABLED, true); GenericObjectPoolConfig config = new GenericObjectPoolConfig(); config.setMaxTotal(this.props.getNumThreads()); config.setMaxIdle(this.props.getNumThreads()); this.clientPool = HiveMetastoreClientPool.get(this.props.getProperties(), metastoreURI); this.metricContext = GobblinMetricsRegistry.getInstance().getMetricContext(state, HiveMetaStoreBasedRegister.class, GobblinMetrics.getCustomTagsFromState(state)); this.eventSubmitter = new EventSubmitter.Builder(this.metricContext, "org.apache.gobblin.hive.HiveMetaStoreBasedRegister").build(); }