org.apache.druid.indexer.HadoopTuningConfig java code examples

HadoopTuningConfig tuningConfigForTask = new HadoopTuningConfig(
  tuningConfig.getWorkingPath(),
  version,
  tuningConfig.getPartitionsSpec(),
  tuningConfig.getShardSpecs(),
  tuningConfig.getIndexSpec(),
  tuningConfig.getRowFlushBoundary(),
  tuningConfig.getMaxBytesInMemory(),
  tuningConfig.isLeaveIntermediate(),
  tuningConfig.isCleanupOnFailure(),
  tuningConfig.isOverwriteFiles(),
  tuningConfig.isIgnoreInvalidRows(),
  tuningConfig.getJobProperties(),
  tuningConfig.isCombineText(),
  tuningConfig.getUseCombiner(),
  tuningConfig.getRowFlushBoundary(),
  tuningConfig.getBuildV9Directly(),
  tuningConfig.getNumBackgroundPersistThreads(),
  tuningConfig.isForceExtendableShardSpecs(),
  true,
  tuningConfig.getUserAllowedHadoopPrefix(),
  tuningConfig.isLogParseExceptions(),
  tuningConfig.getMaxParseExceptions()
);

for (Map.Entry<Long, List<HadoopyShardSpec>> entry : spec.getTuningConfig().getShardSpecs().entrySet()) {
 if (entry.getValue() == null || entry.getValue().isEmpty()) {
  continue;
this.allowedHadoopPrefix.add("druid.javascript");
this.allowedHadoopPrefix.addAll(DATA_SEGMENT_PUSHER.getAllowedPropertyPrefixesForHadoop());
this.allowedHadoopPrefix.addAll(spec.getTuningConfig().getUserAllowedHadoopPrefix());

public void verify()
{
 Preconditions.checkNotNull(schema.getDataSchema().getDataSource(), "dataSource");
 Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec(), "parseSpec");
 Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec().getTimestampSpec(), "timestampSpec");
 Preconditions.checkNotNull(schema.getDataSchema().getGranularitySpec(), "granularitySpec");
 Preconditions.checkNotNull(pathSpec, "inputSpec");
 Preconditions.checkNotNull(schema.getTuningConfig().getWorkingPath(), "workingPath");
 Preconditions.checkNotNull(schema.getIOConfig().getSegmentOutputPath(), "segmentOutputPath");
 Preconditions.checkNotNull(schema.getTuningConfig().getVersion(), "version");
}

private static IncrementalIndex makeIncrementalIndex(
  Bucket theBucket,
  AggregatorFactory[] aggs,
  HadoopDruidIndexerConfig config,
  Iterable<String> oldDimOrder,
  Map<String, ColumnCapabilitiesImpl> oldCapabilities
)
{
 final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();
 final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
   .withMinTimestamp(theBucket.time.getMillis())
   .withTimestampSpec(config.getSchema().getDataSchema().getParser().getParseSpec().getTimestampSpec())
   .withDimensionsSpec(config.getSchema().getDataSchema().getParser())
   .withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity())
   .withMetrics(aggs)
   .withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup())
   .build();
 IncrementalIndex newIndex = new IncrementalIndex.Builder()
   .setIndexSchema(indexSchema)
   .setReportParseExceptions(!tuningConfig.isIgnoreInvalidRows()) // only used by OffHeapIncrementalIndex
   .setMaxRowCount(tuningConfig.getRowFlushBoundary())
   .setMaxBytesInMemory(TuningConfigs.getMaxBytesInMemoryOrDefault(tuningConfig.getMaxBytesInMemory()))
   .buildOnheap();
 if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) {
  newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities);
 }
 return newIndex;
}

public String getWorkingPath()
{
 final String workingPath = schema.getTuningConfig().getWorkingPath();
 return workingPath == null ? DEFAULT_WORKING_PATH : workingPath;
}

public PartitionsSpec getPartitionsSpec()
{
 return schema.getTuningConfig().getPartitionsSpec();
}

int numBackgroundPersistThreads = config.getSchema().getTuningConfig().getNumBackgroundPersistThreads();
if (numBackgroundPersistThreads > 0) {
 final BlockingQueue<Runnable> queue = new SynchronousQueue<>();
  config.getDataSource(),
  interval,
  config.getSchema().getTuningConfig().getVersion(),
  null,
  ImmutableList.copyOf(allDimensionNames),

public void addJobProperties(Configuration conf)
{
 for (final Map.Entry<String, String> entry : schema.getTuningConfig().getJobProperties().entrySet()) {
  conf.set(entry.getKey(), entry.getValue());
 }
}

public int getShardSpecCount(Bucket bucket)
{
 return schema.getTuningConfig().getShardSpecs().get(bucket.time.getMillis()).size();
}

public HadoopTuningConfig withVersion(String ver)
{
 return new HadoopTuningConfig(
   workingPath,
   ver,
   partitionsSpec,
   shardSpecs,
   indexSpec,
   rowFlushBoundary,
   maxBytesInMemory,
   leaveIntermediate,
   cleanupOnFailure,
   overwriteFiles,
   ignoreInvalidRows,
   jobProperties,
   combineText,
   useCombiner,
   null,
   true,
   numBackgroundPersistThreads,
   forceExtendableShardSpecs,
   useExplicitVersion,
   allowedHadoopPrefix,
   logParseExceptions,
   maxParseExceptions
 );
}

public int getMaxParseExceptions()
{
 return schema.getTuningConfig().getMaxParseExceptions();
}

public IndexSpec getIndexSpec()
{
 return schema.getTuningConfig().getIndexSpec();
}

private static IncrementalIndex makeIncrementalIndex(
  Bucket theBucket,
  AggregatorFactory[] aggs,
  HadoopDruidIndexerConfig config,
  Iterable<String> oldDimOrder,
  Map<String, ColumnCapabilitiesImpl> oldCapabilities
)
{
 final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();
 final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
   .withMinTimestamp(theBucket.time.getMillis())
   .withTimestampSpec(config.getSchema().getDataSchema().getParser().getParseSpec().getTimestampSpec())
   .withDimensionsSpec(config.getSchema().getDataSchema().getParser())
   .withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity())
   .withMetrics(aggs)
   .withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup())
   .build();
 IncrementalIndex newIndex = new IncrementalIndex.Builder()
   .setIndexSchema(indexSchema)
   .setReportParseExceptions(!tuningConfig.isIgnoreInvalidRows()) // only used by OffHeapIncrementalIndex
   .setMaxRowCount(tuningConfig.getRowFlushBoundary())
   .setMaxBytesInMemory(TuningConfigs.getMaxBytesInMemoryOrDefault(tuningConfig.getMaxBytesInMemory()))
   .buildOnheap();
 if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) {
  newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities);
 }
 return newIndex;
}

public void verify()
{
 Preconditions.checkNotNull(schema.getDataSchema().getDataSource(), "dataSource");
 Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec(), "parseSpec");
 Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec().getTimestampSpec(), "timestampSpec");
 Preconditions.checkNotNull(schema.getDataSchema().getGranularitySpec(), "granularitySpec");
 Preconditions.checkNotNull(pathSpec, "inputSpec");
 Preconditions.checkNotNull(schema.getTuningConfig().getWorkingPath(), "workingPath");
 Preconditions.checkNotNull(schema.getIOConfig().getSegmentOutputPath(), "segmentOutputPath");
 Preconditions.checkNotNull(schema.getTuningConfig().getVersion(), "version");
}

  "segmentOutputPath must be absent"
);
Preconditions.checkArgument(this.spec.getTuningConfig().getWorkingPath() == null, "workingPath must be absent");
Preconditions.checkArgument(
  this.spec.getIOConfig().getMetadataUpdateSpec() == null,

public boolean isDeterminingPartitions()
{
 return schema.getTuningConfig().getPartitionsSpec().isDeterminingPartitions();
}

for (Map.Entry<Long, List<HadoopyShardSpec>> entry : spec.getTuningConfig().getShardSpecs().entrySet()) {
 if (entry.getValue() == null || entry.getValue().isEmpty()) {
  continue;
this.allowedHadoopPrefix.add("druid.javascript");
this.allowedHadoopPrefix.addAll(DATA_SEGMENT_PUSHER.getAllowedPropertyPrefixesForHadoop());
this.allowedHadoopPrefix.addAll(spec.getTuningConfig().getUserAllowedHadoopPrefix());

int numBackgroundPersistThreads = config.getSchema().getTuningConfig().getNumBackgroundPersistThreads();
if (numBackgroundPersistThreads > 0) {
 final BlockingQueue<Runnable> queue = new SynchronousQueue<>();
  config.getDataSource(),
  interval,
  config.getSchema().getTuningConfig().getVersion(),
  null,
  ImmutableList.copyOf(allDimensionNames),

Assert.assertEquals(task.getDataSource(), task2.getDataSource());
Assert.assertEquals(
  task.getSpec().getTuningConfig().getJobProperties(),
  task2.getSpec().getTuningConfig().getJobProperties()
);
Assert.assertEquals("blah", task.getClasspathPrefix());

public HadoopyShardSpec getShardSpec(Bucket bucket)
{
 return schema.getTuningConfig().getShardSpecs().get(bucket.time.getMillis()).get(bucket.partitionNum);
}

How to useHadoopTuningConfig in org.apache.druid.indexer

Best Java code snippets using org.apache.druid.indexer.HadoopTuningConfig (Showing top 20 results out of 315)

How to use
HadoopTuningConfig
in
org.apache.druid.indexer