private static String getTheDataSource(HadoopIngestionSpec spec) { return spec.getDataSchema().getDataSource(); }
public GranularitySpec getGranularitySpec() { return schema.getDataSchema().getGranularitySpec(); }
public InputRowParser getParser() { return schema.getDataSchema().getParser(); }
public String getDataSource() { return schema.getDataSchema().getDataSource(); }
public Optional<Set<Interval>> getSegmentGranularIntervals() { return Optional.fromNullable( (Set<Interval>) schema.getDataSchema() .getGranularitySpec() .bucketIntervals() .orNull() ); }
public Optional<List<Interval>> getIntervals() { Optional<SortedSet<Interval>> setOptional = schema.getDataSchema().getGranularitySpec().bucketIntervals(); if (setOptional.isPresent()) { return Optional.of((List<Interval>) JodaUtils.condenseIntervals(setOptional.get())); } else { return Optional.absent(); } }
public List<Interval> getInputIntervals() { return schema.getDataSchema() .getGranularitySpec() .inputIntervals(); }
public void verify() { Preconditions.checkNotNull(schema.getDataSchema().getDataSource(), "dataSource"); Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec(), "parseSpec"); Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec().getTimestampSpec(), "timestampSpec"); Preconditions.checkNotNull(schema.getDataSchema().getGranularitySpec(), "granularitySpec"); Preconditions.checkNotNull(pathSpec, "inputSpec"); Preconditions.checkNotNull(schema.getTuningConfig().getWorkingPath(), "workingPath"); Preconditions.checkNotNull(schema.getIOConfig().getSegmentOutputPath(), "segmentOutputPath"); Preconditions.checkNotNull(schema.getTuningConfig().getVersion(), "version"); }
public void setGranularitySpec(GranularitySpec granularitySpec) { this.schema = schema.withDataSchema(schema.getDataSchema().withGranularitySpec(granularitySpec)); this.pathSpec = JSON_MAPPER.convertValue(schema.getIOConfig().getPathSpec(), PathSpec.class); }
this.rollupGran = spec.getDataSchema().getGranularitySpec().getQueryGranularity();
@Override public boolean isReady(TaskActionClient taskActionClient) throws Exception { Optional<SortedSet<Interval>> intervals = spec.getDataSchema().getGranularitySpec().bucketIntervals(); if (intervals.isPresent()) { Interval interval = JodaUtils.umbrellaInterval( JodaUtils.condenseIntervals( intervals.get() ) ); return taskActionClient.submit(new LockTryAcquireAction(TaskLockType.EXCLUSIVE, interval)) != null; } else { return true; } }
/** * Get the proper bucket for some input row. * * @param inputRow an InputRow * * @return the Bucket that this row belongs to */ public Optional<Bucket> getBucket(InputRow inputRow) { final Optional<Interval> timeBucket = schema.getDataSchema().getGranularitySpec().bucketInterval( DateTimes.utc(inputRow.getTimestampFromEpoch()) ); if (!timeBucket.isPresent()) { return Optional.absent(); } final DateTime bucketStart = timeBucket.get().getStart(); final ShardSpec actualSpec = shardSpecLookups.get(bucketStart.getMillis()) .getShardSpec( rollupGran.bucketStart(inputRow.getTimestamp()).getMillis(), inputRow ); final HadoopyShardSpec hadoopyShardSpec = hadoopShardSpecLookup.get(bucketStart.getMillis()).get(actualSpec); return Optional.of( new Bucket( hadoopyShardSpec.getShardNum(), bucketStart, actualSpec.getPartitionNum() ) ); }
protected File mergeQueryableIndex( final List<QueryableIndex> indexes, final AggregatorFactory[] aggs, final File file, ProgressIndicator progressIndicator ) throws IOException { boolean rollup = config.getSchema().getDataSchema().getGranularitySpec().isRollup(); return HadoopDruidIndexerConfig.INDEX_MERGER_V9 .mergeQueryableIndex(indexes, rollup, aggs, file, config.getIndexSpec(), progressIndicator, null); }
@Override protected void setup(Context context) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); aggregators = config.getSchema().getDataSchema().getAggregators(); combiningAggs = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { metricNames.add(aggregators[i].getName()); combiningAggs[i] = aggregators[i].getCombiningFactory(); } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
@Override protected void setup(Context context) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); aggregators = config.getSchema().getDataSchema().getAggregators(); combiningAggs = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { combiningAggs[i] = aggregators[i].getCombiningFactory(); } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
/** * Make the intermediate path for this job run. * * @return the intermediate path for this job run. */ public Path makeIntermediatePath() { return new Path( StringUtils.format( "%s/%s/%s_%s", getWorkingPath(), schema.getDataSchema().getDataSource(), StringUtils.removeChar(schema.getTuningConfig().getVersion(), ':'), schema.getUniqueId() ) ); }
for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) { metricsFields.addAll(agg.requiredFields());
for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) { metricsFields.addAll(agg.requiredFields());
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); aggregators = config.getSchema().getDataSchema().getAggregators(); if (DatasourcePathSpec.checkIfReindexingAndIsUseAggEnabled(config.getSchema().getIOConfig().getPathSpec())) { aggsForSerializingSegmentInputRow = aggregators; } else { // Note: this is required for "delta-ingestion" use case where we are reading rows stored in Druid as well // as late arriving data on HDFS etc. aggsForSerializingSegmentInputRow = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { aggsForSerializingSegmentInputRow[i] = aggregators[i].getCombiningFactory(); } } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
private static IncrementalIndex makeIncrementalIndex( Bucket theBucket, AggregatorFactory[] aggs, HadoopDruidIndexerConfig config, Iterable<String> oldDimOrder, Map<String, ColumnCapabilitiesImpl> oldCapabilities ) { final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig(); final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() .withMinTimestamp(theBucket.time.getMillis()) .withTimestampSpec(config.getSchema().getDataSchema().getParser().getParseSpec().getTimestampSpec()) .withDimensionsSpec(config.getSchema().getDataSchema().getParser()) .withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity()) .withMetrics(aggs) .withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup()) .build(); IncrementalIndex newIndex = new IncrementalIndex.Builder() .setIndexSchema(indexSchema) .setReportParseExceptions(!tuningConfig.isIgnoreInvalidRows()) // only used by OffHeapIncrementalIndex .setMaxRowCount(tuningConfig.getRowFlushBoundary()) .setMaxBytesInMemory(TuningConfigs.getMaxBytesInMemoryOrDefault(tuningConfig.getMaxBytesInMemory())) .buildOnheap(); if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) { newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities); } return newIndex; }