public DruidRecordWriter(DataSchema dataSchema, RealtimeTuningConfig realtimeTuningConfig, DataSegmentPusher dataSegmentPusher, int maxPartitionSize, final Path segmentsDescriptorsDir, final FileSystem fileSystem) { File basePersistDir = new File(realtimeTuningConfig.getBasePersistDirectory(), UUID.randomUUID().toString()); this.tuningConfig = Preconditions.checkNotNull(realtimeTuningConfig.withBasePersistDirectory(basePersistDir), "realtimeTuningConfig is null"); this.dataSchema = Preconditions.checkNotNull(dataSchema, "data schema is null"); appenderator = Appenderators.createOffline(this.dataSchema, tuningConfig, new FireDepartmentMetrics(), dataSegmentPusher, DruidStorageHandlerUtils.JSON_MAPPER, DruidStorageHandlerUtils.INDEX_IO, DruidStorageHandlerUtils.INDEX_MERGER_V9); this.maxPartitionSize = maxPartitionSize; appenderator.startJob(); this.segmentsDescriptorDir = Preconditions.checkNotNull(segmentsDescriptorsDir, "segmentsDescriptorsDir is null"); this.fileSystem = Preconditions.checkNotNull(fileSystem, "file system is null"); this.segmentGranularity = this.dataSchema.getGranularitySpec().getSegmentGranularity(); committerSupplier = Suppliers.ofInstance(Committers.nil())::get; }
public GranularitySpec getGranularitySpec() { return schema.getDataSchema().getGranularitySpec(); }
public Optional<Set<Interval>> getSegmentGranularIntervals() { return Optional.fromNullable( (Set<Interval>) schema.getDataSchema() .getGranularitySpec() .bucketIntervals() .orNull() ); }
public Optional<List<Interval>> getIntervals() { Optional<SortedSet<Interval>> setOptional = schema.getDataSchema().getGranularitySpec().bucketIntervals(); if (setOptional.isPresent()) { return Optional.of((List<Interval>) JodaUtils.condenseIntervals(setOptional.get())); } else { return Optional.absent(); } }
@Override public boolean isReady(TaskActionClient taskActionClient) throws Exception { final Optional<SortedSet<Interval>> intervals = ingestionSchema.getDataSchema() .getGranularitySpec() .bucketIntervals(); if (intervals.isPresent()) { return isReady(taskActionClient, intervals.get()); } else { return true; } }
public List<Interval> getInputIntervals() { return schema.getDataSchema() .getGranularitySpec() .inputIntervals(); }
this.rollupGran = spec.getDataSchema().getGranularitySpec().getQueryGranularity();
@Override public boolean isReady(TaskActionClient taskActionClient) throws Exception { Optional<SortedSet<Interval>> intervals = spec.getDataSchema().getGranularitySpec().bucketIntervals(); if (intervals.isPresent()) { Interval interval = JodaUtils.umbrellaInterval( JodaUtils.condenseIntervals( intervals.get() ) ); return taskActionClient.submit(new LockTryAcquireAction(TaskLockType.EXCLUSIVE, interval)) != null; } else { return true; } }
@Override public SegmentIdentifier allocate( final InputRow row, final String sequenceName, final String previousSegmentId, final boolean skipSegmentLineageCheck ) throws IOException { return taskActionClient.submit( new SegmentAllocateAction( dataSchema.getDataSource(), row.getTimestamp(), dataSchema.getGranularitySpec().getQueryGranularity(), dataSchema.getGranularitySpec().getSegmentGranularity(), sequenceName, previousSegmentId, skipSegmentLineageCheck ) ); } }
/** * Get the proper bucket for some input row. * * @param inputRow an InputRow * * @return the Bucket that this row belongs to */ public Optional<Bucket> getBucket(InputRow inputRow) { final Optional<Interval> timeBucket = schema.getDataSchema().getGranularitySpec().bucketInterval( DateTimes.utc(inputRow.getTimestampFromEpoch()) ); if (!timeBucket.isPresent()) { return Optional.absent(); } final DateTime bucketStart = timeBucket.get().getStart(); final ShardSpec actualSpec = shardSpecLookups.get(bucketStart.getMillis()) .getShardSpec( rollupGran.bucketStart(inputRow.getTimestamp()).getMillis(), inputRow ); final HadoopyShardSpec hadoopyShardSpec = hadoopShardSpecLookup.get(bucketStart.getMillis()).get(actualSpec); return Optional.of( new Bucket( hadoopyShardSpec.getShardNum(), bucketStart, actualSpec.getPartitionNum() ) ); }
public void verify() { try { log.info("Running with config:%n%s", JSON_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(this)); } catch (IOException e) { throw Throwables.propagate(e); } Preconditions.checkNotNull(schema.getDataSchema().getDataSource(), "dataSource"); Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec(), "parseSpec"); Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec().getTimestampSpec(), "timestampSpec"); Preconditions.checkNotNull(schema.getDataSchema().getGranularitySpec(), "granularitySpec"); Preconditions.checkNotNull(pathSpec, "inputSpec"); Preconditions.checkNotNull(schema.getTuningConfig().getWorkingPath(), "workingPath"); Preconditions.checkNotNull(schema.getIOConfig().getSegmentOutputPath(), "segmentOutputPath"); Preconditions.checkNotNull(schema.getTuningConfig().getVersion(), "version"); }
private void mergeAndPush() final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity(); final Period windowPeriod = config.getWindowPeriod();
protected File mergeQueryableIndex( final List<QueryableIndex> indexes, final AggregatorFactory[] aggs, final File file, ProgressIndicator progressIndicator ) throws IOException { boolean rollup = config.getSchema().getDataSchema().getGranularitySpec().isRollup(); return HadoopDruidIndexerConfig.INDEX_MERGER_V9.mergeQueryableIndex( indexes, rollup, aggs, file, config.getIndexSpec(), progressIndicator, null ); }
protected void startPersistThread() final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity(); final Period windowPeriod = config.getWindowPeriod();
private void startPersistThread() final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity(); final Period windowPeriod = config.getWindowPeriod();
indexMergerV9.mergeQueryableIndex( indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), fileToUpload,
.getGranularitySpec() .bucketIntervals() .isPresent(); .getGranularitySpec() .withIntervals( JodaUtils.condenseIntervals(
private SegmentIdentifier getSegmentIdentifier(long timestamp) { if (!rejectionPolicy.accept(timestamp)) { return null; } final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity(); final VersioningPolicy versioningPolicy = config.getVersioningPolicy(); DateTime truncatedDateTime = segmentGranularity.bucketStart(DateTimes.utc(timestamp)); final long truncatedTime = truncatedDateTime.getMillis(); SegmentIdentifier retVal = segments.get(truncatedTime); if (retVal == null) { final Interval interval = new Interval( truncatedDateTime, segmentGranularity.increment(truncatedDateTime) ); retVal = new SegmentIdentifier( schema.getDataSource(), interval, versioningPolicy.getVersion(interval), config.getShardSpec() ); addSegment(retVal); } return retVal; }
private Sink getSink(long timestamp) { if (!rejectionPolicy.accept(timestamp)) { return null; } final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity(); final VersioningPolicy versioningPolicy = config.getVersioningPolicy(); DateTime truncatedDateTime = segmentGranularity.bucketStart(DateTimes.utc(timestamp)); final long truncatedTime = truncatedDateTime.getMillis(); Sink retVal = sinks.get(truncatedTime); if (retVal == null) { final Interval sinkInterval = new Interval( truncatedDateTime, segmentGranularity.increment(truncatedDateTime) ); retVal = new Sink( sinkInterval, schema, config.getShardSpec(), versioningPolicy.getVersion(sinkInterval), config.getMaxRowsInMemory(), config.isReportParseExceptions() ); addSink(retVal); } return retVal; }
private static IncrementalIndex makeIncrementalIndex( Bucket theBucket, AggregatorFactory[] aggs, HadoopDruidIndexerConfig config, Iterable<String> oldDimOrder, Map<String, ColumnCapabilitiesImpl> oldCapabilities ) { final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig(); final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() .withMinTimestamp(theBucket.time.getMillis()) .withTimestampSpec(config.getSchema().getDataSchema().getParser().getParseSpec().getTimestampSpec()) .withDimensionsSpec(config.getSchema().getDataSchema().getParser()) .withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity()) .withMetrics(aggs) .withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup()) .build(); IncrementalIndex newIndex = new IncrementalIndex.Builder() .setIndexSchema(indexSchema) .setReportParseExceptions(!tuningConfig.isIgnoreInvalidRows()) .setMaxRowCount(tuningConfig.getRowFlushBoundary()) .buildOnheap(); if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) { newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities); } return newIndex; }