org.apache.druid.indexer.HadoopIngestionSpec.getIOConfig java code examples

public boolean isUpdaterJobSpecSet()
{
 return (schema.getIOConfig().getMetadataUpdateSpec() != null);
}

@Override
protected List<? extends Module> getModules()
{
 return ImmutableList.of(
   binder -> {
    binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/internal-hadoop-indexer");
    binder.bindConstant().annotatedWith(Names.named("servicePort")).to(0);
    binder.bindConstant().annotatedWith(Names.named("tlsServicePort")).to(-1);
    // bind metadata storage config based on HadoopIOConfig
    MetadataStorageUpdaterJobSpec metadataSpec = getHadoopDruidIndexerConfig().getSchema()
                                         .getIOConfig()
                                         .getMetadataUpdateSpec();
    binder.bind(new TypeLiteral<Supplier<MetadataStorageConnectorConfig>>() {})
       .toInstance(metadataSpec);
    binder.bind(MetadataStorageTablesConfig.class).toInstance(metadataSpec.getMetadataStorageTablesConfig());
    binder.bind(IndexerMetadataStorageCoordinator.class).to(IndexerSQLMetadataStorageCoordinator.class).in(
      LazySingleton.class
    );
   }
 );
}

public void setGranularitySpec(GranularitySpec granularitySpec)
{
 this.schema = schema.withDataSchema(schema.getDataSchema().withGranularitySpec(granularitySpec));
 this.pathSpec = JSON_MAPPER.convertValue(schema.getIOConfig().getPathSpec(), PathSpec.class);
}

public void setShardSpecs(Map<Long, List<HadoopyShardSpec>> shardSpecs)
{
 this.schema = schema.withTuningConfig(schema.getTuningConfig().withShardSpecs(shardSpecs));
 this.pathSpec = JSON_MAPPER.convertValue(schema.getIOConfig().getPathSpec(), PathSpec.class);
}

this.pathSpec = JSON_MAPPER.convertValue(spec.getIOConfig().getPathSpec(), PathSpec.class);
for (Map.Entry<Long, List<HadoopyShardSpec>> entry : spec.getTuningConfig().getShardSpecs().entrySet()) {
 if (entry.getValue() == null || entry.getValue().isEmpty()) {

public void verify()
{
 Preconditions.checkNotNull(schema.getDataSchema().getDataSource(), "dataSource");
 Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec(), "parseSpec");
 Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec().getTimestampSpec(), "timestampSpec");
 Preconditions.checkNotNull(schema.getDataSchema().getGranularitySpec(), "granularitySpec");
 Preconditions.checkNotNull(pathSpec, "inputSpec");
 Preconditions.checkNotNull(schema.getTuningConfig().getWorkingPath(), "workingPath");
 Preconditions.checkNotNull(schema.getIOConfig().getSegmentOutputPath(), "segmentOutputPath");
 Preconditions.checkNotNull(schema.getTuningConfig().getVersion(), "version");
}

  this.spec.getIOConfig().getSegmentOutputPath() == null,
  "segmentOutputPath must be absent"
);
Preconditions.checkArgument(this.spec.getTuningConfig().getWorkingPath() == null, "workingPath must be absent");
Preconditions.checkArgument(
  this.spec.getIOConfig().getMetadataUpdateSpec() == null,
  "metadataUpdateSpec must be absent"
);

String ingestionSpec = "ingestionSpec";
Map<String, Object> pathSpec = spec.getIOConfig().getPathSpec();
List<Map<String, Object>> datasourcePathSpecs = new ArrayList<>();
if (pathSpec.get(type).equals(dataSource)) {

@Override
public void run()
{
 try {
  Injector injector = makeInjector();
  config = getHadoopDruidIndexerConfig();
  MetadataStorageUpdaterJobSpec metadataSpec = config.getSchema().getIOConfig().getMetadataUpdateSpec();
  // override metadata storage type based on HadoopIOConfig
  Preconditions.checkNotNull(metadataSpec.getType(), "type in metadataUpdateSpec must not be null");
  injector.getInstance(Properties.class).setProperty("druid.metadata.storage.type", metadataSpec.getType());
  config = HadoopDruidIndexerConfig.fromSpec(
    HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(
      config.getSchema(),
      HadoopDruidIndexerConfig.JSON_MAPPER,
      new MetadataStoreBasedUsedSegmentLister(
        injector.getInstance(IndexerMetadataStorageCoordinator.class)
      )
    )
  );
  List<Jobby> jobs = new ArrayList<>();
  jobs.add(new HadoopDruidDetermineConfigurationJob(config));
  jobs.add(new HadoopDruidIndexerJob(config, injector.getInstance(MetadataStorageUpdaterJobHandler.class)));
  JobHelper.runJobs(jobs, config);
 }
 catch (Exception e) {
  throw Throwables.propagate(e);
 }
}

 @Override
 public boolean run()
 {
  final List<DataSegment> segments = IndexGeneratorJob.getPublishedSegments(config);
  final String segmentTable = config.getSchema().getIOConfig().getMetadataUpdateSpec().getSegmentTable();
  handler.publishSegments(segmentTable, segments, HadoopDruidIndexerConfig.JSON_MAPPER);

  return true;
 }
}

  config.getSchema().getIOConfig().getSegmentOutputPath(),
  segmentGranularity.toPath(timeBucket)
);

 );
final FileSystem outputFS = new Path(config.getSchema().getIOConfig().getSegmentOutputPath())
  .getFileSystem(context.getConfiguration());
  mergedBase,
  JobHelper.makeFileNamePath(
    new Path(config.getSchema().getIOConfig().getSegmentOutputPath()),
    outputFS,
    segmentTemplate,
  ),
  JobHelper.makeFileNamePath(
    new Path(config.getSchema().getIOConfig().getSegmentOutputPath()),
    outputFS,
    segmentTemplate,
  ),
  JobHelper.makeTmpPath(
    new Path(config.getSchema().getIOConfig().getSegmentOutputPath()),
    outputFS,
    segmentTemplate,

public String runTask(String[] args) throws Exception
{
 final String schema = args[0];
 final String workingPath = args[1];
 final String segmentOutputPath = args[2];
 final String hadoopJobIdFile = args[3];
 final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.JSON_MAPPER
   .readValue(
     schema,
     HadoopIngestionSpec.class
   );
 final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec(
   theSchema
     .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath))
     .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath))
 );
 job = new HadoopDruidDetermineConfigurationJob(config);
 job.setHadoopJobIdFile(hadoopJobIdFile);
 log.info("Starting a hadoop determine configuration job...");
 if (job.run()) {
  return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(
    new HadoopDetermineConfigInnerProcessingStatus(config.getSchema(), job.getStats(), null)
  );
 } else {
  return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(
    new HadoopDetermineConfigInnerProcessingStatus(null, job.getStats(), job.getErrorMessage())
  );
 }
}

@Override
protected void setup(Context context)
  throws IOException, InterruptedException
{
 super.setup(context);
 aggregators = config.getSchema().getDataSchema().getAggregators();
 if (DatasourcePathSpec.checkIfReindexingAndIsUseAggEnabled(config.getSchema().getIOConfig().getPathSpec())) {
  aggsForSerializingSegmentInputRow = aggregators;
 } else {
  // Note: this is required for "delta-ingestion" use case where we are reading rows stored in Druid as well
  // as late arriving data on HDFS etc.
  aggsForSerializingSegmentInputRow = new AggregatorFactory[aggregators.length];
  for (int i = 0; i < aggregators.length; ++i) {
   aggsForSerializingSegmentInputRow[i] = aggregators[i].getCombiningFactory();
  }
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

public boolean isUpdaterJobSpecSet()
{
 return (schema.getIOConfig().getMetadataUpdateSpec() != null);
}

@Override
protected List<? extends Module> getModules()
{
 return ImmutableList.of(
   binder -> {
    binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/internal-hadoop-indexer");
    binder.bindConstant().annotatedWith(Names.named("servicePort")).to(0);
    binder.bindConstant().annotatedWith(Names.named("tlsServicePort")).to(-1);
    // bind metadata storage config based on HadoopIOConfig
    MetadataStorageUpdaterJobSpec metadataSpec = getHadoopDruidIndexerConfig().getSchema()
                                         .getIOConfig()
                                         .getMetadataUpdateSpec();
    binder.bind(new TypeLiteral<Supplier<MetadataStorageConnectorConfig>>() {})
       .toInstance(metadataSpec);
    binder.bind(MetadataStorageTablesConfig.class).toInstance(metadataSpec.getMetadataStorageTablesConfig());
    binder.bind(IndexerMetadataStorageCoordinator.class).to(IndexerSQLMetadataStorageCoordinator.class).in(
      LazySingleton.class
    );
   }
 );
}

public void setShardSpecs(Map<Long, List<HadoopyShardSpec>> shardSpecs)
{
 this.schema = schema.withTuningConfig(schema.getTuningConfig().withShardSpecs(shardSpecs));
 this.pathSpec = JSON_MAPPER.convertValue(schema.getIOConfig().getPathSpec(), PathSpec.class);
}

public void setGranularitySpec(GranularitySpec granularitySpec)
{
 this.schema = schema.withDataSchema(schema.getDataSchema().withGranularitySpec(granularitySpec));
 this.pathSpec = JSON_MAPPER.convertValue(schema.getIOConfig().getPathSpec(), PathSpec.class);
}

public void verify()
{
 Preconditions.checkNotNull(schema.getDataSchema().getDataSource(), "dataSource");
 Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec(), "parseSpec");
 Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec().getTimestampSpec(), "timestampSpec");
 Preconditions.checkNotNull(schema.getDataSchema().getGranularitySpec(), "granularitySpec");
 Preconditions.checkNotNull(pathSpec, "inputSpec");
 Preconditions.checkNotNull(schema.getTuningConfig().getWorkingPath(), "workingPath");
 Preconditions.checkNotNull(schema.getIOConfig().getSegmentOutputPath(), "segmentOutputPath");
 Preconditions.checkNotNull(schema.getTuningConfig().getVersion(), "version");
}

 @Override
 public boolean run()
 {
  final List<DataSegment> segments = IndexGeneratorJob.getPublishedSegments(config);
  final String segmentTable = config.getSchema().getIOConfig().getMetadataUpdateSpec().getSegmentTable();
  handler.publishSegments(segmentTable, segments, HadoopDruidIndexerConfig.JSON_MAPPER);

  return true;
 }
}

Popular methods of HadoopIngestionSpec

Popular in Java

Start an intent from android
scheduleAtFixedRate (Timer)
findViewById (Activity)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
BufferedWriter (java.io)
Wraps an existing Writer and buffers the output. Expensive interaction with the underlying reader is
BlockingQueue (java.util.concurrent)
A java.util.Queue that additionally supports operations that wait for the queue to become non-empty
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
Handler (java.util.logging)
A Handler object accepts a logging request and exports the desired messages to a target, for example
Collectors (java.util.stream)
ZipFile (java.util.zip)
This class provides random read access to a zip file. You pay more to read the zip file's central di
Top plugins for WebStorm

How to use getIOConfigmethodin org.apache.druid.indexer.HadoopIngestionSpec

Best Java code snippets using org.apache.druid.indexer.HadoopIngestionSpec.getIOConfig (Showing top 20 results out of 315)

How to use
getIOConfig
method
in
org.apache.druid.indexer.HadoopIngestionSpec