public RowCountPolicy(State state, TaskLevelPolicy.Type type) { super(state, type); this.rowsRead = state.getPropAsLong(ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED); this.rowsWritten = state.getPropAsLong(ConfigurationKeys.WRITER_ROWS_WRITTEN); }
/** * Get the interval that the Writer Metrics Updater should be scheduled on. */ private static long getWriterMetricsUpdaterInterval(State state) { return state.getPropAsLong(WRITER_METRICS_UPDATER_INTERVAL, DEFAULT_WRITER_METRICS_UPDATER_INTERVAL); }
private long getDataVerifTimeoutMinutes() { return this.state.getPropAsLong(COMPACTION_VERIFICATION_TIMEOUT_MINUTES, DEFAULT_COMPACTION_VERIFICATION_TIMEOUT_MINUTES); }
private long getCompactionTimeoutMinutes() { return this.state.getPropAsLong(COMPACTION_MR_JOB_TIMEOUT_MINUTES, DEFAULT_COMPACTION_MR_JOB_TIMEOUT_MINUTES); }
private long calculateTimestamp(State state) { long timestamp = state.getPropAsLong(ConfigurationKeys.WORK_UNIT_DATE_PARTITION_KEY, -1L); if (timestamp == -1L) { throw new IllegalArgumentException( "WORK_UNIT_DATE_PARTITION_KEY not present in WorkUnitState; is source an instance of DatePartitionedAvroFileSource?"); } return timestamp; } }
public RowCountRangePolicy(State state, Type type) { super(state, type); this.rowsRead = state.getPropAsLong(ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED); this.rowsWritten = state.getPropAsLong(ConfigurationKeys.WRITER_ROWS_WRITTEN); this.range = state.getPropAsDouble(ConfigurationKeys.ROW_COUNT_RANGE); }
private long getTargetFileSize() { return this.dataset.jobProps().getPropAsLong(COMPACTION_JOB_TARGET_OUTPUT_FILE_SIZE, DEFAULT_COMPACTION_JOB_TARGET_OUTPUT_FILE_SIZE); }
@Override Object convertField(JsonElement value) { GenericRecord avroRecord = new GenericData.Record(_schema); long maxFailedConversions = this.workUnit.getPropAsLong(ConfigurationKeys.CONVERTER_AVRO_MAX_CONVERSION_FAILURES, ConfigurationKeys.DEFAULT_CONVERTER_AVRO_MAX_CONVERSION_FAILURES); for (Map.Entry<String, JsonElement> entry : ((JsonObject) value).entrySet()) { try { avroRecord.put(entry.getKey(), this.converters.get(entry.getKey()).convert(entry.getValue())); } catch (Exception e) { this.numFailedConversion++; if (this.numFailedConversion < maxFailedConversions) { LOG.error("Dropping record " + value + " because it cannot be converted to Avro", e); return new EmptyIterable<>(); } throw new RuntimeException( "Unable to convert field:" + entry.getKey() + " for value:" + entry.getValue() + " for record: " + value, e); } } return avroRecord; }
public RowLevelPolicyChecker(List<RowLevelPolicy> list, String stateId, FileSystem fs, State state) { this.list = list; this.stateId = stateId; this.fs = fs; this.errFileOpen = false; this.closer = Closer.create(); this.writer = this.closer.register(new RowLevelErrFileWriter(this.fs)); this.results = new RowLevelPolicyCheckResults(); this.sampler = new FrontLoadedSampler(state.getPropAsLong(ConfigurationKeys.ROW_LEVEL_ERR_FILE_RECORDS_PER_TASK, ConfigurationKeys.DEFAULT_ROW_LEVEL_ERR_FILE_RECORDS_PER_TASK), 1.5); }
/** * Get the average record size of a partition, which is stored in property "[topicname].[partitionid].avg.record.size". * If state doesn't contain this property, it returns defaultSize. */ public static long getPartitionAvgRecordSize(State state, KafkaPartition partition) { return state.getPropAsLong( getPartitionPropName(partition.getTopicName(), partition.getId()) + "." + KafkaSource.AVG_RECORD_SIZE); }
private void setCompactionSLATimestamp(Dataset dataset) { // Set up SLA timestamp only if this dataset will be compacted and MRCompactor.COMPACTION_INPUT_PATH_TIME is present. if ((this.recompactFromOutputPaths || !MRCompactor.datasetAlreadyCompacted(this.fs, dataset, this.renameSourceDirEnabled)) && dataset.jobProps().contains(MRCompactor.COMPACTION_INPUT_PATH_TIME)) { long timeInMills = dataset.jobProps().getPropAsLong(MRCompactor.COMPACTION_INPUT_PATH_TIME); // Set the upstream time to partition + 1 day. E.g. for 2015/10/13 the upstream time is midnight of 2015/10/14 CompactionSlaEventHelper.setUpstreamTimeStamp(this.state, timeInMills + TimeUnit.MILLISECONDS.convert(1, TimeUnit.DAYS)); } }
/** * Refer to {@link MRCompactorAvroKeyDedupJobRunner#setNumberOfReducers(Job)} */ protected void setNumberOfReducers(Job job) throws IOException { // get input size long inputSize = 0; for (Path inputPath : this.mapReduceInputPaths) { inputSize += this.fs.getContentSummary(inputPath).getLength(); } // get target file size long targetFileSize = this.state.getPropAsLong(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_TARGET_OUTPUT_FILE_SIZE, MRCompactorAvroKeyDedupJobRunner.DEFAULT_COMPACTION_JOB_TARGET_OUTPUT_FILE_SIZE); // get max reducers int maxNumReducers = state.getPropAsInt(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_MAX_NUM_REDUCERS, MRCompactorAvroKeyDedupJobRunner.DEFAULT_COMPACTION_JOB_MAX_NUM_REDUCERS); int numReducers = Math.min(Ints.checkedCast(inputSize / targetFileSize) + 1, maxNumReducers); // get use prime reducers boolean usePrimeReducers = state.getPropAsBoolean(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_USE_PRIME_REDUCERS, MRCompactorAvroKeyDedupJobRunner.DEFAULT_COMPACTION_JOB_USE_PRIME_REDUCERS); if (usePrimeReducers && numReducers != 1) { numReducers = Primes.nextPrime(numReducers); } job.setNumReduceTasks(numReducers); }
@SuppressWarnings({ "serial", "unchecked" }) protected static <T> Optional<T> populateField(State state, String key, TypeToken<T> token) { if (state.contains(key)) { Optional<T> fieldValue; if (new TypeToken<Boolean>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsBoolean(key)); } else if (new TypeToken<Integer>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsInt(key)); } else if (new TypeToken<Long>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsLong(key)); } else if (new TypeToken<List<String>>() {}.isAssignableFrom(token)) { fieldValue = (Optional<T>) Optional.of(state.getPropAsList(key)); } else { fieldValue = (Optional<T>) Optional.of(state.getProp(key)); } state.removeProp(key); return fieldValue; } return Optional.<T> absent(); }
/** * @return RetryerBuilder that retries on all exceptions except NonTransientException with exponential back off */ public static RetryerBuilder<Void> createRetryBuilder(State state) { Predicate<Throwable> transients = new Predicate<Throwable>() { @Override public boolean apply(Throwable t) { return !(t instanceof NonTransientException); } }; long multiplier = state.getPropAsLong(RETRY_MULTIPLIER, 500L); long maxWaitMsPerInterval = state.getPropAsLong(RETRY_MAX_WAIT_MS_PER_INTERVAL, 10000); int maxAttempts = state.getPropAsInt(RETRY_MAX_ATTEMPTS, 5); return RetryerBuilder.<Void> newBuilder() .retryIfException(transients) .withWaitStrategy(WaitStrategies.exponentialWait(multiplier, maxWaitMsPerInterval, TimeUnit.MILLISECONDS)) //1, 2, 4, 8, 16 seconds delay .withStopStrategy(StopStrategies.stopAfterAttempt(maxAttempts)); //Total 5 attempts and fail. }
/** * Get an {@link SlaEventSubmitterBuilder} that has dataset urn, partition, record count, previous publish timestamp * and dedupe status set. * The caller MUST set eventSubmitter, eventname before submitting. */ public static SlaEventSubmitterBuilder getEventSubmitterBuilder(Dataset dataset, Optional<Job> job, FileSystem fs) { SlaEventSubmitterBuilder builder = SlaEventSubmitter.builder().datasetUrn(dataset.getUrn()) .partition(dataset.jobProps().getProp(MRCompactor.COMPACTION_JOB_DEST_PARTITION, "")) .dedupeStatus(getOutputDedupeStatus(dataset.jobProps())); long previousPublishTime = getPreviousPublishTime(dataset, fs); long upstreamTime = dataset.jobProps().getPropAsLong(SlaEventKeys.UPSTREAM_TS_IN_MILLI_SECS_KEY, -1l); long recordCount = getRecordCount(job); // Previous publish only exists when this is a recompact job if (previousPublishTime != -1l) { builder.previousPublishTimestamp(Long.toString(previousPublishTime)); } // Upstream time is the logical time represented by the compaction input directory if (upstreamTime != -1l) { builder.upstreamTimestamp(Long.toString(upstreamTime)); } if (recordCount != -1l) { builder.recordCount(Long.toString(recordCount)); } return builder; }
this.fs.getDefaultReplication(this.outputFile)); this.blockSize = properties.getPropAsLong(ForkOperatorUtils .getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_BLOCK_SIZE, this.numBranches, this.branchId), this.fs.getDefaultBlockSize(this.outputFile));
DateTime folderTime = result.getTime(); DateTimeZone timeZone = DateTimeZone.forID(this.state.getProp(MRCompactor.COMPACTION_TIMEZONE, MRCompactor.DEFAULT_COMPACTION_TIMEZONE)); DateTime compactionStartTime = new DateTime(this.state.getPropAsLong(CompactionSource.COMPACTION_INIT_TIME), timeZone); PeriodFormatter formatter = new PeriodFormatterBuilder().appendMonths().appendSuffix("m").appendDays().appendSuffix("d").appendHours() .appendSuffix("h").toFormatter();
Assert.assertEquals(state.getPropAsList("list", "item1,item2").get(0), "item1"); Assert.assertEquals(state.getPropAsList("list", "item1,item2").get(1), "item2"); Assert.assertEquals(state.getPropAsLong("long", Long.MAX_VALUE), Long.MAX_VALUE); Assert.assertEquals(state.getPropAsInt("int", Integer.MAX_VALUE), Integer.MAX_VALUE); Assert.assertEquals(state.getPropAsDouble("double", Double.MAX_VALUE), Double.MAX_VALUE); Assert.assertEquals(state.getPropAsList("list").get(0), "item1"); Assert.assertEquals(state.getPropAsList("list").get(1), "item2"); Assert.assertEquals(state.getPropAsLong("long"), Long.MAX_VALUE); Assert.assertEquals(state.getPropAsInt("int"), Integer.MAX_VALUE); Assert.assertEquals(state.getPropAsDouble("double"), Double.MAX_VALUE); Assert.assertNotEquals(state.getPropAsList("list", "item1,item2").get(0), "item1"); Assert.assertNotEquals(state.getPropAsList("list", "item1,item2").get(1), "item2"); Assert.assertNotEquals(state.getPropAsLong("long", Long.MAX_VALUE), Long.MAX_VALUE); Assert.assertNotEquals(state.getPropAsInt("int", Integer.MAX_VALUE), Integer.MAX_VALUE); Assert.assertNotEquals(state.getPropAsDouble("double", Double.MAX_VALUE), Double.MAX_VALUE); Assert.assertNotEquals(state.getPropAsList("list").get(0), "item1"); Assert.assertNotEquals(state.getPropAsList("list").get(1), "item2"); Assert.assertNotEquals(state.getPropAsLong("long"), Long.MAX_VALUE); Assert.assertNotEquals(state.getPropAsInt("int"), Integer.MAX_VALUE); Assert.assertNotEquals(state.getPropAsDouble("double"), Double.MAX_VALUE); Assert.assertEquals(state.getPropAsList("list").get(0), "item3"); Assert.assertEquals(state.getPropAsList("list").get(1), "item4"); Assert.assertEquals(state.getPropAsLong("long"), Long.MIN_VALUE); Assert.assertEquals(state.getPropAsInt("int"), Integer.MIN_VALUE); Assert.assertEquals(state.getPropAsDouble("double"), Double.MIN_VALUE);
public RowCountPolicy(State state, TaskLevelPolicy.Type type) { super(state, type); this.rowsRead = state.getPropAsLong(ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED); this.rowsWritten = state.getPropAsLong(ConfigurationKeys.WRITER_ROWS_WRITTEN); }
public RowLevelPolicyChecker(List<RowLevelPolicy> list, String stateId, FileSystem fs, State state) { this.list = list; this.stateId = stateId; this.fs = fs; this.errFileOpen = false; this.closer = Closer.create(); this.writer = this.closer.register(new RowLevelErrFileWriter(this.fs)); this.results = new RowLevelPolicyCheckResults(); this.sampler = new FrontLoadedSampler(state.getPropAsLong(ConfigurationKeys.ROW_LEVEL_ERR_FILE_RECORDS_PER_TASK, ConfigurationKeys.DEFAULT_ROW_LEVEL_ERR_FILE_RECORDS_PER_TASK), 1.5); }