public DruidRecordWriter(DataSchema dataSchema, RealtimeTuningConfig realtimeTuningConfig, DataSegmentPusher dataSegmentPusher, int maxPartitionSize, final Path segmentsDescriptorsDir, final FileSystem fileSystem) { File basePersistDir = new File(realtimeTuningConfig.getBasePersistDirectory(), UUID.randomUUID().toString()); this.tuningConfig = Preconditions.checkNotNull(realtimeTuningConfig.withBasePersistDirectory(basePersistDir), "realtimeTuningConfig is null"); this.dataSchema = Preconditions.checkNotNull(dataSchema, "data schema is null"); appenderator = Appenderators.createOffline(this.dataSchema, tuningConfig, new FireDepartmentMetrics(), dataSegmentPusher, DruidStorageHandlerUtils.JSON_MAPPER, DruidStorageHandlerUtils.INDEX_IO, DruidStorageHandlerUtils.INDEX_MERGER_V9); this.maxPartitionSize = maxPartitionSize; appenderator.startJob(); this.segmentsDescriptorDir = Preconditions.checkNotNull(segmentsDescriptorsDir, "segmentsDescriptorsDir is null"); this.fileSystem = Preconditions.checkNotNull(fileSystem, "file system is null"); this.segmentGranularity = this.dataSchema.getGranularitySpec().getSegmentGranularity(); committerSupplier = Suppliers.ofInstance(Committers.nil())::get; }
final Granularity segmentGranularity = config.getGranularitySpec().getSegmentGranularity(); new UniformGranularitySpec( segmentGranularity, config.getGranularitySpec().getQueryGranularity(), config.getGranularitySpec().isRollup(), Lists.newArrayList(bucketsToRun)
int thrownAway = 0; int unparseable = 0; final Granularity queryGranularity = granularitySpec.getQueryGranularity(); interval = granularitySpec.getSegmentGranularity().bucket(inputRow.getTimestamp()); } else { final Optional<Interval> optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp()); if (!optInterval.isPresent()) { thrownAway++;
@Override public SegmentIdentifier allocate( final InputRow row, final String sequenceName, final String previousSegmentId, final boolean skipSegmentLineageCheck ) throws IOException { return taskActionClient.submit( new SegmentAllocateAction( dataSchema.getDataSource(), row.getTimestamp(), dataSchema.getGranularitySpec().getQueryGranularity(), dataSchema.getGranularitySpec().getSegmentGranularity(), sequenceName, previousSegmentId, skipSegmentLineageCheck ) ); } }
if (determineIntervals) { interval = config.getGranularitySpec() .getSegmentGranularity() .bucket(DateTimes.utc(inputRow.getTimestampFromEpoch())); .bucketInterval(DateTimes.utc(inputRow.getTimestampFromEpoch()));
.withMinTimestamp(minTimestamp) .withTimestampSpec(schema.getParser()) .withQueryGranularity(schema.getGranularitySpec().getQueryGranularity()) .withDimensionsSpec(schema.getParser()) .withMetrics(schema.getAggregators()) .withRollup(schema.getGranularitySpec().isRollup()) .build(); final IncrementalIndex newIndex = new IncrementalIndex.Builder()
} else if (this.granularitySpec.isRollup()) { log.warn("No metricsSpec has been specified. Are you sure this is what you want?");
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); rollupGranularity = getConfig().getGranularitySpec().getQueryGranularity(); config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); Optional<Set<Interval>> intervals = config.getSegmentGranularIntervals(); if (intervals.isPresent()) { determineIntervals = false; final ImmutableMap.Builder<Interval, HyperLogLogCollector> builder = ImmutableMap.builder(); for (final Interval bucketInterval : intervals.get()) { builder.put(bucketInterval, HyperLogLogCollector.makeLatestCollector()); } hyperLogLogs = builder.build(); } else { determineIntervals = true; hyperLogLogs = Maps.newHashMap(); } }
public DeterminePartitionsDimSelectionMapperHelper(HadoopDruidIndexerConfig config, String partitionDimension) { this.config = config; this.partitionDimension = partitionDimension; final ImmutableMap.Builder<Long, Integer> timeIndexBuilder = ImmutableMap.builder(); int idx = 0; for (final Interval bucketInterval : config.getGranularitySpec().bucketIntervals().get()) { timeIndexBuilder.put(bucketInterval.getStartMillis(), idx); idx++; } this.intervalIndexes = timeIndexBuilder.build(); }
continue; if (!granularitySpec.bucketIntervals().isPresent() || granularitySpec.bucketInterval(DateTimes.utc(inputRow.getTimestampFromEpoch())) .isPresent()) { innerMap(inputRow, context, reportParseExceptions);
) throws IOException, InterruptedException final Optional<Interval> maybeInterval = config.getGranularitySpec().bucketInterval(timestamp);
.bucketIntervals() .isPresent(); ingestionSchema.getDataSchema() .getGranularitySpec() .withIntervals( JodaUtils.condenseIntervals( shardSpecs.getIntervals()
public List<Interval> getInputIntervals() { return schema.getDataSchema() .getGranularitySpec() .inputIntervals(); }
config.setGranularitySpec( new UniformGranularitySpec( config.getGranularitySpec().getSegmentGranularity(), config.getGranularitySpec().getQueryGranularity(), config.getGranularitySpec().isRollup(), intervals
interval = config.getGranularitySpec().getSegmentGranularity().bucket(DateTimes.utc(key.get())); } else { Optional<Interval> intervalOptional = config.getGranularitySpec().bucketInterval(DateTimes.utc(key.get()));
private static IncrementalIndex makeIncrementalIndex( Bucket theBucket, AggregatorFactory[] aggs, HadoopDruidIndexerConfig config, Iterable<String> oldDimOrder, Map<String, ColumnCapabilitiesImpl> oldCapabilities ) { final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig(); final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() .withMinTimestamp(theBucket.time.getMillis()) .withTimestampSpec(config.getSchema().getDataSchema().getParser().getParseSpec().getTimestampSpec()) .withDimensionsSpec(config.getSchema().getDataSchema().getParser()) .withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity()) .withMetrics(aggs) .withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup()) .build(); IncrementalIndex newIndex = new IncrementalIndex.Builder() .setIndexSchema(indexSchema) .setReportParseExceptions(!tuningConfig.isIgnoreInvalidRows()) .setMaxRowCount(tuningConfig.getRowFlushBoundary()) .buildOnheap(); if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) { newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities); } return newIndex; }
protected File mergeQueryableIndex( final List<QueryableIndex> indexes, final AggregatorFactory[] aggs, final File file, ProgressIndicator progressIndicator ) throws IOException { boolean rollup = config.getSchema().getDataSchema().getGranularitySpec().isRollup(); return HadoopDruidIndexerConfig.INDEX_MERGER_V9.mergeQueryableIndex( indexes, rollup, aggs, file, config.getIndexSpec(), progressIndicator, null ); }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); rollupGranularity = getConfig().getGranularitySpec().getQueryGranularity(); }
public Optional<Set<Interval>> getSegmentGranularIntervals() { return Optional.fromNullable( (Set<Interval>) schema.getDataSchema() .getGranularitySpec() .bucketIntervals() .orNull() ); }
/** * Get the proper bucket for some input row. * * @param inputRow an InputRow * * @return the Bucket that this row belongs to */ public Optional<Bucket> getBucket(InputRow inputRow) { final Optional<Interval> timeBucket = schema.getDataSchema().getGranularitySpec().bucketInterval( DateTimes.utc(inputRow.getTimestampFromEpoch()) ); if (!timeBucket.isPresent()) { return Optional.absent(); } final DateTime bucketStart = timeBucket.get().getStart(); final ShardSpec actualSpec = shardSpecLookups.get(bucketStart.getMillis()) .getShardSpec( rollupGran.bucketStart(inputRow.getTimestamp()).getMillis(), inputRow ); final HadoopyShardSpec hadoopyShardSpec = hadoopShardSpecLookup.get(bucketStart.getMillis()).get(actualSpec); return Optional.of( new Bucket( hadoopyShardSpec.getShardNum(), bucketStart, actualSpec.getPartitionNum() ) ); }