private int getThreadPoolSize() { return this.state.getPropAsInt(COMPACTION_THREAD_POOL_SIZE, DEFAULT_COMPACTION_THREAD_POOL_SIZE); }
/** * Get the number of {@link Dataset}s to be verified together. This allows multiple {@link Dataset}s * to share the same verification job, e.g., share the same query. */ private int getNumDatasetsVerifiedTogether() { return this.state.getPropAsInt(COMPACTION_COMPLETENESS_VERIFICATION_NUM_DATASETS_VERIFIED_TOGETHER, DEFAULT_COMPACTION_COMPLETENESS_VERIFICATION_NUM_DATASETS_VERIFIED_TOGETHER); }
private int getDataCompletenessVerificationThreadPoolSize() { return this.props.getPropAsInt(COMPACTION_COMPLETENESS_VERIFICATION_THREAD_POOL_SIZE, DEFAULT_COMPACTION_COMPLETENESS_VERIFICATION_THREAD_POOL_SIZE); }
/** * Constructor */ public PinotAuditCountHttpClient(State state) { int maxTotal = state.getPropAsInt(CONNECTION_MAX_TOTAL, DEFAULT_CONNECTION_MAX_TOTAL); int maxPerRoute = state.getPropAsInt(MAX_PER_ROUTE, DEFAULT_MAX_PER_ROUTE); cm = new PoolingHttpClientConnectionManager(); cm.setMaxTotal(maxTotal); cm.setDefaultMaxPerRoute(maxPerRoute); httpClient = HttpClients.custom() .setConnectionManager(cm) .build(); String host = state.getProp(TARGET_HOST); int port = state.getPropAsInt(TARGET_PORT); targetUrl = host + ":" + port + "/pql?pql="; }
public RecoveryHelper(FileSystem fs, State state) throws IOException { this.fs = fs; this.persistDir = getPersistDir(state); this.retentionHours = state.getPropAsInt(PERSIST_RETENTION_KEY, DEFAULT_PERSIST_RETENTION); }
private int getOwnFileCountThreshold (Dataset dataset) { int count = dataset.jobProps().getPropAsInt(MRCompactor.COMPACTION_LATEDATA_THRESHOLD_FILE_NUM, MRCompactor.DEFAULT_COMPACTION_LATEDATA_THRESHOLD_FILE_NUM); return count; }
private int getMaxNumReducers() { return this.dataset.jobProps().getPropAsInt(COMPACTION_JOB_MAX_NUM_REDUCERS, DEFAULT_COMPACTION_JOB_MAX_NUM_REDUCERS); }
/** * Constructor */ public KafkaAuditCountHttpClient (State state) { int maxTotal = state.getPropAsInt(CONNECTION_MAX_TOTAL, DEFAULT_CONNECTION_MAX_TOTAL); int maxPerRoute = state.getPropAsInt(MAX_PER_ROUTE, DEFAULT_MAX_PER_ROUTE); cm = new PoolingHttpClientConnectionManager(); cm.setMaxTotal(maxTotal); cm.setDefaultMaxPerRoute(maxPerRoute); httpClient = HttpClients.custom() .setConnectionManager(cm) .build(); this.baseUrl = state.getProp(KAFKA_AUDIT_REST_BASE_URL); this.maxNumTries = state.getPropAsInt(KAFKA_AUDIT_REST_MAX_TRIES, 5); this.startQueryString = state.getProp(KAFKA_AUDIT_REST_START_QUERYSTRING_KEY, KAFKA_AUDIT_REST_START_QUERYSTRING_DEFAULT); this.endQueryString = state.getProp(KAFKA_AUDIT_REST_END_QUERYSTRING_KEY, KAFKA_AUDIT_REST_END_QUERYSTRING_DEFAULT); }
/** * Cleanup staging data of a Gobblin task. * * @param state a {@link State} instance storing task configuration properties * @param logger a {@link Logger} used for logging */ public static void cleanTaskStagingData(State state, Logger logger) throws IOException { int numBranches = state.getPropAsInt(ConfigurationKeys.FORK_BRANCHES_KEY, 1); for (int branchId = 0; branchId < numBranches; branchId++) { String writerFsUri = state.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, numBranches, branchId), ConfigurationKeys.LOCAL_FS_URI); FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state)); Path stagingPath = WriterUtils.getWriterStagingDir(state, numBranches, branchId); if (fs.exists(stagingPath)) { logger.info("Cleaning up staging directory " + stagingPath.toUri().getPath()); if (!fs.delete(stagingPath, true)) { throw new IOException("Clean up staging directory " + stagingPath.toUri().getPath() + " failed"); } } Path outputPath = WriterUtils.getWriterOutputDir(state, numBranches, branchId); if (fs.exists(outputPath)) { logger.info("Cleaning up output directory " + outputPath.toUri().getPath()); if (!fs.delete(outputPath, true)) { throw new IOException("Clean up output directory " + outputPath.toUri().getPath() + " failed"); } } } }
/** * Calls {@link #getOptionallyThrottledFileSystem(FileSystem, int)} parsing the qps from the input {@link State} * at key {@link #MAX_FILESYSTEM_QPS}. * @throws IOException */ public static FileSystem getOptionallyThrottledFileSystem(FileSystem fs, State state) throws IOException { DeprecationUtils.renameDeprecatedKeys(state, MAX_FILESYSTEM_QPS, DEPRECATED_KEYS); if (state.contains(MAX_FILESYSTEM_QPS)) { return getOptionallyThrottledFileSystem(fs, state.getPropAsInt(MAX_FILESYSTEM_QPS)); } return fs; }
public HiveAvroSerDeManager(State props) throws IOException { super(props); this.fs = FileSystem.get(HadoopUtils.getConfFromState(props)); this.useSchemaFile = props.getPropAsBoolean(USE_SCHEMA_FILE, DEFAULT_USE_SCHEMA_FILE); this.schemaFileName = props.getProp(SCHEMA_FILE_NAME, DEFAULT_SCHEMA_FILE_NAME); this.schemaLiteralLengthLimit = props.getPropAsInt(SCHEMA_LITERAL_LENGTH_LIMIT, DEFAULT_SCHEMA_LITERAL_LENGTH_LIMIT); this.metricContext = Instrumented.getMetricContext(props, HiveAvroSerDeManager.class); }
int numBranches = state.getPropAsInt(ConfigurationKeys.FORK_BRANCHES_KEY, 1); state.getPropAsInt(ParallelRunner.PARALLEL_RUNNER_THREADS_KEY, ParallelRunner.DEFAULT_PARALLEL_RUNNER_THREADS);
/** * Refer to {@link MRCompactorAvroKeyDedupJobRunner#setNumberOfReducers(Job)} */ protected void setNumberOfReducers(Job job) throws IOException { // get input size long inputSize = 0; for (Path inputPath : this.mapReduceInputPaths) { inputSize += this.fs.getContentSummary(inputPath).getLength(); } // get target file size long targetFileSize = this.state.getPropAsLong(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_TARGET_OUTPUT_FILE_SIZE, MRCompactorAvroKeyDedupJobRunner.DEFAULT_COMPACTION_JOB_TARGET_OUTPUT_FILE_SIZE); // get max reducers int maxNumReducers = state.getPropAsInt(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_MAX_NUM_REDUCERS, MRCompactorAvroKeyDedupJobRunner.DEFAULT_COMPACTION_JOB_MAX_NUM_REDUCERS); int numReducers = Math.min(Ints.checkedCast(inputSize / targetFileSize) + 1, maxNumReducers); // get use prime reducers boolean usePrimeReducers = state.getPropAsBoolean(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_USE_PRIME_REDUCERS, MRCompactorAvroKeyDedupJobRunner.DEFAULT_COMPACTION_JOB_USE_PRIME_REDUCERS); if (usePrimeReducers && numReducers != 1) { numReducers = Primes.nextPrime(numReducers); } job.setNumReduceTasks(numReducers); }
state.getPropAsInt(HiveSource.HIVE_SOURCE_MAXIMUM_LOOKBACK_DAYS_KEY, HiveSource.DEFAULT_HIVE_SOURCE_MAXIMUM_LOOKBACK_DAYS) + BUFFER_WATERMARK_DAYS_TO_PERSIST;
public FileAwareInputStreamDataWriter(State state, int numBranches, int branchId, String writerAttemptId) throws IOException { super(state); if (numBranches > 1) { throw new IOException("Distcp can only operate with one branch."); } if (!(state instanceof WorkUnitState)) { throw new RuntimeException(String.format("Distcp requires a %s on construction.", WorkUnitState.class.getSimpleName())); } this.state = (WorkUnitState) state; this.taskBroker = this.state.getTaskBroker(); this.writerAttemptIdOptional = Optional.fromNullable(writerAttemptId); String uri = this.state.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, numBranches, branchId), ConfigurationKeys.LOCAL_FS_URI); this.fs = FileSystem.get(URI.create(uri), WriterUtils.getFsConfiguration(state)); this.stagingDir = this.writerAttemptIdOptional.isPresent() ? WriterUtils .getWriterStagingDir(state, numBranches, branchId, this.writerAttemptIdOptional.get()) : WriterUtils.getWriterStagingDir(state, numBranches, branchId); this.outputDir = getOutputDir(state); this.copyableDatasetMetadata = CopyableDatasetMetadata.deserialize(state.getProp(CopySource.SERIALIZED_COPYABLE_DATASET)); this.recoveryHelper = new RecoveryHelper(this.fs, state); this.actualProcessedCopyableFile = Optional.absent(); this.copySpeedMeter = getMetricContext().meter(GOBBLIN_COPY_BYTES_COPIED_METER); this.bufferSize = state.getPropAsInt(CopyConfiguration.BUFFER_SIZE, StreamCopier.DEFAULT_BUFFER_SIZE); this.encryptionConfig = EncryptionConfigParser .getConfigForBranch(EncryptionConfigParser.EntityType.WRITER, this.state, numBranches, branchId); }
@SuppressWarnings({ "serial", "unchecked" }) protected static <T> Optional<T> populateField(State state, String key, TypeToken<T> token) { if (state.contains(key)) { Optional<T> fieldValue; if (new TypeToken<Boolean>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsBoolean(key)); } else if (new TypeToken<Integer>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsInt(key)); } else if (new TypeToken<Long>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsLong(key)); } else if (new TypeToken<List<String>>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsList(key)); } else { fieldValue = (Optional<T>) Optional.of(state.getProp(key)); } state.removeProp(key); return fieldValue; } return Optional.<T> absent(); }
email.setHostName(state.getProp(ConfigurationKeys.EMAIL_HOST_KEY, ConfigurationKeys.DEFAULT_EMAIL_HOST)); if (state.contains(ConfigurationKeys.EMAIL_SMTP_PORT_KEY)) { email.setSmtpPort(state.getPropAsInt(ConfigurationKeys.EMAIL_SMTP_PORT_KEY));
/** * Convert a {@link HivePartition} into a {@link Partition}. */ public static Partition getPartition(HivePartition hivePartition) { State props = hivePartition.getProps(); Partition partition = new Partition(); partition.setDbName(hivePartition.getDbName()); partition.setTableName(hivePartition.getTableName()); partition.setValues(hivePartition.getValues()); partition.setParameters(getParameters(props)); if (hivePartition.getCreateTime().isPresent()) { partition.setCreateTime(Ints.checkedCast(hivePartition.getCreateTime().get())); } else if (props.contains(HiveConstants.CREATE_TIME)) { partition.setCreateTime(props.getPropAsInt(HiveConstants.CREATE_TIME)); } if (props.contains(HiveConstants.LAST_ACCESS_TIME)) { partition.setLastAccessTime(props.getPropAsInt(HiveConstants.LAST_ACCESS_TIME)); } partition.setSd(getStorageDescriptor(hivePartition)); return partition; }
MRCompactor.COMPACTION_TRACKING_EVENTS_NAMESPACE).build(); this.copyLateDataThreadPoolSize = this.dataset.jobProps().getPropAsInt(COMPACTION_COPY_LATE_DATA_THREAD_POOL_SIZE, DEFAULT_COMPACTION_COPY_LATE_DATA_THREAD_POOL_SIZE);