org.apache.druid.indexer.HadoopIngestionSpec java code examples

public HadoopIngestionSpec withDataSchema(DataSchema schema)
{
 return new HadoopIngestionSpec(
   schema,
   ioConfig,
   tuningConfig,
   uniqueId
 );
}

public void verify()
{
 Preconditions.checkNotNull(schema.getDataSchema().getDataSource(), "dataSource");
 Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec(), "parseSpec");
 Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec().getTimestampSpec(), "timestampSpec");
 Preconditions.checkNotNull(schema.getDataSchema().getGranularitySpec(), "granularitySpec");
 Preconditions.checkNotNull(pathSpec, "inputSpec");
 Preconditions.checkNotNull(schema.getTuningConfig().getWorkingPath(), "workingPath");
 Preconditions.checkNotNull(schema.getIOConfig().getSegmentOutputPath(), "segmentOutputPath");
 Preconditions.checkNotNull(schema.getTuningConfig().getVersion(), "version");
}

@Override
public void run()
{
 try {
  Injector injector = makeInjector();
  config = getHadoopDruidIndexerConfig();
  MetadataStorageUpdaterJobSpec metadataSpec = config.getSchema().getIOConfig().getMetadataUpdateSpec();
  // override metadata storage type based on HadoopIOConfig
  Preconditions.checkNotNull(metadataSpec.getType(), "type in metadataUpdateSpec must not be null");
  injector.getInstance(Properties.class).setProperty("druid.metadata.storage.type", metadataSpec.getType());
  config = HadoopDruidIndexerConfig.fromSpec(
    HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(
      config.getSchema(),
      HadoopDruidIndexerConfig.JSON_MAPPER,
      new MetadataStoreBasedUsedSegmentLister(
        injector.getInstance(IndexerMetadataStorageCoordinator.class)
      )
    )
  );
  List<Jobby> jobs = new ArrayList<>();
  jobs.add(new HadoopDruidDetermineConfigurationJob(config));
  jobs.add(new HadoopDruidIndexerJob(config, injector.getInstance(MetadataStorageUpdaterJobHandler.class)));
  JobHelper.runJobs(jobs, config);
 }
 catch (Exception e) {
  throw Throwables.propagate(e);
 }
}

public void setGranularitySpec(GranularitySpec granularitySpec)
{
 this.schema = schema.withDataSchema(schema.getDataSchema().withGranularitySpec(granularitySpec));
 this.pathSpec = JSON_MAPPER.convertValue(schema.getIOConfig().getPathSpec(), PathSpec.class);
}

public void setShardSpecs(Map<Long, List<HadoopyShardSpec>> shardSpecs)
{
 this.schema = schema.withTuningConfig(schema.getTuningConfig().withShardSpecs(shardSpecs));
 this.pathSpec = JSON_MAPPER.convertValue(schema.getIOConfig().getPathSpec(), PathSpec.class);
}

public GranularitySpec getGranularitySpec()
{
 return schema.getDataSchema().getGranularitySpec();
}

public void addJobProperties(Configuration conf)
{
 for (final Map.Entry<String, String> entry : schema.getTuningConfig().getJobProperties().entrySet()) {
  conf.set(entry.getKey(), entry.getValue());
 }
}

public String runTask(String[] args) throws Exception
{
 final String schema = args[0];
 final String workingPath = args[1];
 final String segmentOutputPath = args[2];
 final String hadoopJobIdFile = args[3];
 final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.JSON_MAPPER
   .readValue(
     schema,
     HadoopIngestionSpec.class
   );
 final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec(
   theSchema
     .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath))
     .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath))
 );
 job = new HadoopDruidDetermineConfigurationJob(config);
 job.setHadoopJobIdFile(hadoopJobIdFile);
 log.info("Starting a hadoop determine configuration job...");
 if (job.run()) {
  return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(
    new HadoopDetermineConfigInnerProcessingStatus(config.getSchema(), job.getStats(), null)
  );
 } else {
  return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(
    new HadoopDetermineConfigInnerProcessingStatus(null, job.getStats(), job.getErrorMessage())
  );
 }
}

  this.spec.getIOConfig().getSegmentOutputPath() == null,
  "segmentOutputPath must be absent"
);
Preconditions.checkArgument(this.spec.getTuningConfig().getWorkingPath() == null, "workingPath must be absent");
Preconditions.checkArgument(
  this.spec.getIOConfig().getMetadataUpdateSpec() == null,
  "metadataUpdateSpec must be absent"
);

boolean determineIntervals = !spec.getDataSchema().getGranularitySpec().bucketIntervals().isPresent();
spec = HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(
  spec,
  jsonMapper,
 Interval interval = JodaUtils.umbrellaInterval(
   JodaUtils.condenseIntervals(
     indexerSchema.getDataSchema().getGranularitySpec().bucketIntervals().get()
final String specVersion = indexerSchema.getTuningConfig().getVersion();
if (indexerSchema.getTuningConfig().isUseExplicitVersion()) {
 if (specVersion.compareTo(version) < 0) {
  version = specVersion;

/**
 * Make the intermediate path for this job run.
 *
 * @return the intermediate path for this job run.
 */
public Path makeIntermediatePath()
{
 return new Path(
   StringUtils.format(
     "%s/%s/%s_%s",
     getWorkingPath(),
     schema.getDataSchema().getDataSource(),
     StringUtils.removeChar(schema.getTuningConfig().getVersion(), ':'),
     schema.getUniqueId()
   )
 );
}

public boolean isUpdaterJobSpecSet()
{
 return (schema.getIOConfig().getMetadataUpdateSpec() != null);
}

@Override
protected void setup(Context context)
  throws IOException, InterruptedException
{
 super.setup(context);
 aggregators = config.getSchema().getDataSchema().getAggregators();
 if (DatasourcePathSpec.checkIfReindexingAndIsUseAggEnabled(config.getSchema().getIOConfig().getPathSpec())) {
  aggsForSerializingSegmentInputRow = aggregators;
 } else {
  // Note: this is required for "delta-ingestion" use case where we are reading rows stored in Druid as well
  // as late arriving data on HDFS etc.
  aggsForSerializingSegmentInputRow = new AggregatorFactory[aggregators.length];
  for (int i = 0; i < aggregators.length; ++i) {
   aggsForSerializingSegmentInputRow[i] = aggregators[i].getCombiningFactory();
  }
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

private static IncrementalIndex makeIncrementalIndex(
  Bucket theBucket,
  AggregatorFactory[] aggs,
  HadoopDruidIndexerConfig config,
  Iterable<String> oldDimOrder,
  Map<String, ColumnCapabilitiesImpl> oldCapabilities
)
{
 final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();
 final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
   .withMinTimestamp(theBucket.time.getMillis())
   .withTimestampSpec(config.getSchema().getDataSchema().getParser().getParseSpec().getTimestampSpec())
   .withDimensionsSpec(config.getSchema().getDataSchema().getParser())
   .withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity())
   .withMetrics(aggs)
   .withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup())
   .build();
 IncrementalIndex newIndex = new IncrementalIndex.Builder()
   .setIndexSchema(indexSchema)
   .setReportParseExceptions(!tuningConfig.isIgnoreInvalidRows()) // only used by OffHeapIncrementalIndex
   .setMaxRowCount(tuningConfig.getRowFlushBoundary())
   .setMaxBytesInMemory(TuningConfigs.getMaxBytesInMemoryOrDefault(tuningConfig.getMaxBytesInMemory()))
   .buildOnheap();
 if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) {
  newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities);
 }
 return newIndex;
}

  new HadoopIngestionSpec(
    new DataSchema(
      "foo", null, new AggregatorFactory[0], new UniformGranularitySpec(
Assert.assertEquals(task.getDataSource(), task2.getDataSource());
Assert.assertEquals(
  task.getSpec().getTuningConfig().getJobProperties(),
  task2.getSpec().getTuningConfig().getJobProperties()
);
Assert.assertEquals("blah", task.getClasspathPrefix());

final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec(
  theSchema
    .withTuningConfig(theSchema.getTuningConfig().withVersion(version))
);

public InputRowParser getParser()
{
 return schema.getDataSchema().getParser();
}

public String getWorkingPath()
{
 final String workingPath = schema.getTuningConfig().getWorkingPath();
 return workingPath == null ? DEFAULT_WORKING_PATH : workingPath;
}

public String runTask(String[] args) throws Exception
{
 final String schema = args[0];
 final String workingPath = args[1];
 final String segmentOutputPath = args[2];
 final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.JSON_MAPPER
   .readValue(
     schema,
     HadoopIngestionSpec.class
   );
 final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec(
   theSchema
     .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath))
     .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath))
 );
 job = new HadoopDruidDetermineConfigurationJob(config);
 log.info("Starting a hadoop determine configuration job...");
 if (job.run()) {
  return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(
    new HadoopDetermineConfigInnerProcessingStatus(config.getSchema(), job.getStats(), null)
  );
 } else {
  return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(
    new HadoopDetermineConfigInnerProcessingStatus(null, job.getStats(), job.getErrorMessage())
  );
 }
}

int numBackgroundPersistThreads = config.getSchema().getTuningConfig().getNumBackgroundPersistThreads();
if (numBackgroundPersistThreads > 0) {
 final BlockingQueue<Runnable> queue = new SynchronousQueue<>();
final FileSystem outputFS = new Path(config.getSchema().getIOConfig().getSegmentOutputPath())
  .getFileSystem(context.getConfiguration());
  config.getDataSource(),
  interval,
  config.getSchema().getTuningConfig().getVersion(),
  null,
  ImmutableList.copyOf(allDimensionNames),
  mergedBase,
  JobHelper.makeFileNamePath(
    new Path(config.getSchema().getIOConfig().getSegmentOutputPath()),
    outputFS,
    segmentTemplate,
  ),
  JobHelper.makeFileNamePath(
    new Path(config.getSchema().getIOConfig().getSegmentOutputPath()),
    outputFS,
    segmentTemplate,
  ),
  JobHelper.makeTmpPath(
    new Path(config.getSchema().getIOConfig().getSegmentOutputPath()),
    outputFS,

Most used methods

Popular in Java

Running tasks concurrently on multiple threads
getSupportFragmentManager (FragmentActivity)
scheduleAtFixedRate (Timer)
setScale (BigDecimal)
ServerSocket (java.net)
This class represents a server-side socket that waits for incoming client connections. A ServerSocke
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
HttpServlet (javax.servlet.http)
Provides an abstract class to be subclassed to create an HTTP servlet suitable for a Web site. A sub
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
BufferedImage (java.awt.image)
The BufferedImage subclass describes an java.awt.Image with an accessible buffer of image data. All
BoxLayout (javax.swing)
Top Sublime Text plugins

How to useHadoopIngestionSpec in org.apache.druid.indexer

Best Java code snippets using org.apache.druid.indexer.HadoopIngestionSpec (Showing top 20 results out of 315)

How to use
HadoopIngestionSpec
in
org.apache.druid.indexer