org.apache.druid.indexer.HadoopDruidIndexerConfig.getSchema java code examples

@Override
protected List<? extends Module> getModules()
{
 return ImmutableList.of(
   binder -> {
    binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/internal-hadoop-indexer");
    binder.bindConstant().annotatedWith(Names.named("servicePort")).to(0);
    binder.bindConstant().annotatedWith(Names.named("tlsServicePort")).to(-1);
    // bind metadata storage config based on HadoopIOConfig
    MetadataStorageUpdaterJobSpec metadataSpec = getHadoopDruidIndexerConfig().getSchema()
                                         .getIOConfig()
                                         .getMetadataUpdateSpec();
    binder.bind(new TypeLiteral<Supplier<MetadataStorageConnectorConfig>>() {})
       .toInstance(metadataSpec);
    binder.bind(MetadataStorageTablesConfig.class).toInstance(metadataSpec.getMetadataStorageTablesConfig());
    binder.bind(IndexerMetadataStorageCoordinator.class).to(IndexerSQLMetadataStorageCoordinator.class).in(
      LazySingleton.class
    );
   }
 );
}

@Override
public void run()
{
 try {
  Injector injector = makeInjector();
  config = getHadoopDruidIndexerConfig();
  MetadataStorageUpdaterJobSpec metadataSpec = config.getSchema().getIOConfig().getMetadataUpdateSpec();
  // override metadata storage type based on HadoopIOConfig
  Preconditions.checkNotNull(metadataSpec.getType(), "type in metadataUpdateSpec must not be null");
  injector.getInstance(Properties.class).setProperty("druid.metadata.storage.type", metadataSpec.getType());
  config = HadoopDruidIndexerConfig.fromSpec(
    HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(
      config.getSchema(),
      HadoopDruidIndexerConfig.JSON_MAPPER,
      new MetadataStoreBasedUsedSegmentLister(
        injector.getInstance(IndexerMetadataStorageCoordinator.class)
      )
    )
  );
  List<Jobby> jobs = new ArrayList<>();
  jobs.add(new HadoopDruidDetermineConfigurationJob(config));
  jobs.add(new HadoopDruidIndexerJob(config, injector.getInstance(MetadataStorageUpdaterJobHandler.class)));
  JobHelper.runJobs(jobs, config);
 }
 catch (Exception e) {
  throw Throwables.propagate(e);
 }
}

public static boolean runSingleJob(Jobby job, HadoopDruidIndexerConfig config)
{
 boolean succeeded = job.run();
 if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) {
  if (succeeded || config.getSchema().getTuningConfig().isCleanupOnFailure()) {
   Path workingPath = config.makeIntermediatePath();
   log.info("Deleting path[%s]", workingPath);
   try {
    Configuration conf = injectSystemProperties(new Configuration());
    config.addJobProperties(conf);
    workingPath.getFileSystem(conf).delete(workingPath, true);
   }
   catch (IOException e) {
    log.error(e, "Failed to cleanup path[%s]", workingPath);
   }
  }
 }
 return succeeded;
}

public static boolean runJobs(List<Jobby> jobs, HadoopDruidIndexerConfig config)
{
 boolean succeeded = true;
 for (Jobby job : jobs) {
  if (!job.run()) {
   succeeded = false;
   break;
  }
 }
 if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) {
  if (succeeded || config.getSchema().getTuningConfig().isCleanupOnFailure()) {
   Path workingPath = config.makeIntermediatePath();
   log.info("Deleting path[%s]", workingPath);
   try {
    Configuration conf = injectSystemProperties(new Configuration());
    config.addJobProperties(conf);
    workingPath.getFileSystem(conf).delete(workingPath, true);
   }
   catch (IOException e) {
    log.error(e, "Failed to cleanup path[%s]", workingPath);
   }
  }
 }
 return succeeded;
}

 @Override
 public boolean run()
 {
  final List<DataSegment> segments = IndexGeneratorJob.getPublishedSegments(config);
  final String segmentTable = config.getSchema().getIOConfig().getMetadataUpdateSpec().getSegmentTable();
  handler.publishSegments(segmentTable, segments, HadoopDruidIndexerConfig.JSON_MAPPER);

  return true;
 }
}

protected File mergeQueryableIndex(
  final List<QueryableIndex> indexes,
  final AggregatorFactory[] aggs,
  final File file,
  ProgressIndicator progressIndicator
) throws IOException
{
 boolean rollup = config.getSchema().getDataSchema().getGranularitySpec().isRollup();
 return HadoopDruidIndexerConfig.INDEX_MERGER_V9
   .mergeQueryableIndex(indexes, rollup, aggs, file, config.getIndexSpec(), progressIndicator, null);
}

@Override
protected void setup(Context context)
{
 config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
 aggregators = config.getSchema().getDataSchema().getAggregators();
 combiningAggs = new AggregatorFactory[aggregators.length];
 for (int i = 0; i < aggregators.length; ++i) {
  metricNames.add(aggregators[i].getName());
  combiningAggs[i] = aggregators[i].getCombiningFactory();
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

  config.getSchema().getIOConfig().getSegmentOutputPath(),
  segmentGranularity.toPath(timeBucket)
);

@Override
protected void setup(Context context)
  throws IOException, InterruptedException
{
 super.setup(context);
 aggregators = config.getSchema().getDataSchema().getAggregators();
 if (DatasourcePathSpec.checkIfReindexingAndIsUseAggEnabled(config.getSchema().getIOConfig().getPathSpec())) {
  aggsForSerializingSegmentInputRow = aggregators;
 } else {
  // Note: this is required for "delta-ingestion" use case where we are reading rows stored in Druid as well
  // as late arriving data on HDFS etc.
  aggsForSerializingSegmentInputRow = new AggregatorFactory[aggregators.length];
  for (int i = 0; i < aggregators.length; ++i) {
   aggsForSerializingSegmentInputRow[i] = aggregators[i].getCombiningFactory();
  }
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

@Override
protected void setup(Context context)
{
 config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
 aggregators = config.getSchema().getDataSchema().getAggregators();
 combiningAggs = new AggregatorFactory[aggregators.length];
 for (int i = 0; i < aggregators.length; ++i) {
  combiningAggs[i] = aggregators[i].getCombiningFactory();
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) {
 metricsFields.addAll(agg.requiredFields());

for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) {
 metricsFields.addAll(agg.requiredFields());

private static IncrementalIndex makeIncrementalIndex(
  Bucket theBucket,
  AggregatorFactory[] aggs,
  HadoopDruidIndexerConfig config,
  Iterable<String> oldDimOrder,
  Map<String, ColumnCapabilitiesImpl> oldCapabilities
)
{
 final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();
 final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
   .withMinTimestamp(theBucket.time.getMillis())
   .withTimestampSpec(config.getSchema().getDataSchema().getParser().getParseSpec().getTimestampSpec())
   .withDimensionsSpec(config.getSchema().getDataSchema().getParser())
   .withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity())
   .withMetrics(aggs)
   .withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup())
   .build();
 IncrementalIndex newIndex = new IncrementalIndex.Builder()
   .setIndexSchema(indexSchema)
   .setReportParseExceptions(!tuningConfig.isIgnoreInvalidRows()) // only used by OffHeapIncrementalIndex
   .setMaxRowCount(tuningConfig.getRowFlushBoundary())
   .setMaxBytesInMemory(TuningConfigs.getMaxBytesInMemoryOrDefault(tuningConfig.getMaxBytesInMemory()))
   .buildOnheap();
 if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) {
  newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities);
 }
 return newIndex;
}

int numBackgroundPersistThreads = config.getSchema().getTuningConfig().getNumBackgroundPersistThreads();
if (numBackgroundPersistThreads > 0) {
 final BlockingQueue<Runnable> queue = new SynchronousQueue<>();
final FileSystem outputFS = new Path(config.getSchema().getIOConfig().getSegmentOutputPath())
  .getFileSystem(context.getConfiguration());
  config.getDataSource(),
  interval,
  config.getSchema().getTuningConfig().getVersion(),
  null,
  ImmutableList.copyOf(allDimensionNames),
  mergedBase,
  JobHelper.makeFileNamePath(
    new Path(config.getSchema().getIOConfig().getSegmentOutputPath()),
    outputFS,
    segmentTemplate,
  ),
  JobHelper.makeFileNamePath(
    new Path(config.getSchema().getIOConfig().getSegmentOutputPath()),
    outputFS,
    segmentTemplate,
  ),
  JobHelper.makeTmpPath(
    new Path(config.getSchema().getIOConfig().getSegmentOutputPath()),
    outputFS,

if (config.getSchema().getTuningConfig().getUseCombiner()) {
 job.setCombinerClass(IndexGeneratorCombiner.class);
 job.setCombinerKeyGroupingComparatorClass(BytesWritable.Comparator.class);

 final AggregatorFactory[] cols = config.getSchema().getDataSchema().getAggregators();
 if (cols != null) {
  if (useNewAggs) {
  config.getSchema().getDataSchema().getTransformSpec()
);

public String runTask(String[] args) throws Exception
{
 final String schema = args[0];
 final String workingPath = args[1];
 final String segmentOutputPath = args[2];
 final String hadoopJobIdFile = args[3];
 final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.JSON_MAPPER
   .readValue(
     schema,
     HadoopIngestionSpec.class
   );
 final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec(
   theSchema
     .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath))
     .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath))
 );
 job = new HadoopDruidDetermineConfigurationJob(config);
 job.setHadoopJobIdFile(hadoopJobIdFile);
 log.info("Starting a hadoop determine configuration job...");
 if (job.run()) {
  return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(
    new HadoopDetermineConfigInnerProcessingStatus(config.getSchema(), job.getStats(), null)
  );
 } else {
  return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(
    new HadoopDetermineConfigInnerProcessingStatus(null, job.getStats(), job.getErrorMessage())
  );
 }
}

 @Override
 public boolean run()
 {
  final List<DataSegment> segments = IndexGeneratorJob.getPublishedSegments(config);
  final String segmentTable = config.getSchema().getIOConfig().getMetadataUpdateSpec().getSegmentTable();
  handler.publishSegments(segmentTable, segments, HadoopDruidIndexerConfig.JSON_MAPPER);

  return true;
 }
}

protected File mergeQueryableIndex(
  final List<QueryableIndex> indexes,
  final AggregatorFactory[] aggs,
  final File file,
  ProgressIndicator progressIndicator
) throws IOException
{
 boolean rollup = config.getSchema().getDataSchema().getGranularitySpec().isRollup();
 return HadoopDruidIndexerConfig.INDEX_MERGER_V9.mergeQueryableIndex(
   indexes, rollup, aggs, file, config.getIndexSpec(), progressIndicator, null
 );
}

@Override
protected void setup(Context context)
{
 config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
 aggregators = config.getSchema().getDataSchema().getAggregators();
 combiningAggs = new AggregatorFactory[aggregators.length];
 for (int i = 0; i < aggregators.length; ++i) {
  metricNames.add(aggregators[i].getName());
  combiningAggs[i] = aggregators[i].getCombiningFactory();
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

How to use getSchemamethodin org.apache.druid.indexer.HadoopDruidIndexerConfig

Best Java code snippets using org.apache.druid.indexer.HadoopDruidIndexerConfig.getSchema (Showing top 20 results out of 315)

How to use
getSchema
method
in
org.apache.druid.indexer.HadoopDruidIndexerConfig