org.apache.druid.indexer.HadoopIngestionSpec.getDataSchema java code examples

private static String getTheDataSource(HadoopIngestionSpec spec)
{
 return spec.getDataSchema().getDataSource();
}

public GranularitySpec getGranularitySpec()
{
 return schema.getDataSchema().getGranularitySpec();
}

public InputRowParser getParser()
{
 return schema.getDataSchema().getParser();
}

public String getDataSource()
{
 return schema.getDataSchema().getDataSource();
}

public Optional<Set<Interval>> getSegmentGranularIntervals()
{
 return Optional.fromNullable(
   (Set<Interval>) schema.getDataSchema()
              .getGranularitySpec()
              .bucketIntervals()
              .orNull()
 );
}

public Optional<List<Interval>> getIntervals()
{
 Optional<SortedSet<Interval>> setOptional = schema.getDataSchema().getGranularitySpec().bucketIntervals();
 if (setOptional.isPresent()) {
  return Optional.of((List<Interval>) JodaUtils.condenseIntervals(setOptional.get()));
 } else {
  return Optional.absent();
 }
}

public List<Interval> getInputIntervals()
{
 return schema.getDataSchema()
        .getGranularitySpec()
        .inputIntervals();
}

public void verify()
{
 Preconditions.checkNotNull(schema.getDataSchema().getDataSource(), "dataSource");
 Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec(), "parseSpec");
 Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec().getTimestampSpec(), "timestampSpec");
 Preconditions.checkNotNull(schema.getDataSchema().getGranularitySpec(), "granularitySpec");
 Preconditions.checkNotNull(pathSpec, "inputSpec");
 Preconditions.checkNotNull(schema.getTuningConfig().getWorkingPath(), "workingPath");
 Preconditions.checkNotNull(schema.getIOConfig().getSegmentOutputPath(), "segmentOutputPath");
 Preconditions.checkNotNull(schema.getTuningConfig().getVersion(), "version");
}

public void setGranularitySpec(GranularitySpec granularitySpec)
{
 this.schema = schema.withDataSchema(schema.getDataSchema().withGranularitySpec(granularitySpec));
 this.pathSpec = JSON_MAPPER.convertValue(schema.getIOConfig().getPathSpec(), PathSpec.class);
}

this.rollupGran = spec.getDataSchema().getGranularitySpec().getQueryGranularity();

@Override
public boolean isReady(TaskActionClient taskActionClient) throws Exception
{
 Optional<SortedSet<Interval>> intervals = spec.getDataSchema().getGranularitySpec().bucketIntervals();
 if (intervals.isPresent()) {
  Interval interval = JodaUtils.umbrellaInterval(
    JodaUtils.condenseIntervals(
      intervals.get()
    )
  );
  return taskActionClient.submit(new LockTryAcquireAction(TaskLockType.EXCLUSIVE, interval)) != null;
 } else {
  return true;
 }
}

/**
 * Get the proper bucket for some input row.
 *
 * @param inputRow an InputRow
 *
 * @return the Bucket that this row belongs to
 */
public Optional<Bucket> getBucket(InputRow inputRow)
{
 final Optional<Interval> timeBucket = schema.getDataSchema().getGranularitySpec().bucketInterval(
   DateTimes.utc(inputRow.getTimestampFromEpoch())
 );
 if (!timeBucket.isPresent()) {
  return Optional.absent();
 }
 final DateTime bucketStart = timeBucket.get().getStart();
 final ShardSpec actualSpec = shardSpecLookups.get(bucketStart.getMillis())
                        .getShardSpec(
                          rollupGran.bucketStart(inputRow.getTimestamp()).getMillis(),
                          inputRow
                        );
 final HadoopyShardSpec hadoopyShardSpec = hadoopShardSpecLookup.get(bucketStart.getMillis()).get(actualSpec);
 return Optional.of(
   new Bucket(
     hadoopyShardSpec.getShardNum(),
     bucketStart,
     actualSpec.getPartitionNum()
   )
 );
}

protected File mergeQueryableIndex(
  final List<QueryableIndex> indexes,
  final AggregatorFactory[] aggs,
  final File file,
  ProgressIndicator progressIndicator
) throws IOException
{
 boolean rollup = config.getSchema().getDataSchema().getGranularitySpec().isRollup();
 return HadoopDruidIndexerConfig.INDEX_MERGER_V9
   .mergeQueryableIndex(indexes, rollup, aggs, file, config.getIndexSpec(), progressIndicator, null);
}

@Override
protected void setup(Context context)
{
 config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
 aggregators = config.getSchema().getDataSchema().getAggregators();
 combiningAggs = new AggregatorFactory[aggregators.length];
 for (int i = 0; i < aggregators.length; ++i) {
  metricNames.add(aggregators[i].getName());
  combiningAggs[i] = aggregators[i].getCombiningFactory();
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

@Override
protected void setup(Context context)
{
 config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
 aggregators = config.getSchema().getDataSchema().getAggregators();
 combiningAggs = new AggregatorFactory[aggregators.length];
 for (int i = 0; i < aggregators.length; ++i) {
  combiningAggs[i] = aggregators[i].getCombiningFactory();
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

/**
 * Make the intermediate path for this job run.
 *
 * @return the intermediate path for this job run.
 */
public Path makeIntermediatePath()
{
 return new Path(
   StringUtils.format(
     "%s/%s/%s_%s",
     getWorkingPath(),
     schema.getDataSchema().getDataSource(),
     StringUtils.removeChar(schema.getTuningConfig().getVersion(), ':'),
     schema.getUniqueId()
   )
 );
}

for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) {
 metricsFields.addAll(agg.requiredFields());

for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) {
 metricsFields.addAll(agg.requiredFields());

@Override
protected void setup(Context context)
  throws IOException, InterruptedException
{
 super.setup(context);
 aggregators = config.getSchema().getDataSchema().getAggregators();
 if (DatasourcePathSpec.checkIfReindexingAndIsUseAggEnabled(config.getSchema().getIOConfig().getPathSpec())) {
  aggsForSerializingSegmentInputRow = aggregators;
 } else {
  // Note: this is required for "delta-ingestion" use case where we are reading rows stored in Druid as well
  // as late arriving data on HDFS etc.
  aggsForSerializingSegmentInputRow = new AggregatorFactory[aggregators.length];
  for (int i = 0; i < aggregators.length; ++i) {
   aggsForSerializingSegmentInputRow[i] = aggregators[i].getCombiningFactory();
  }
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

private static IncrementalIndex makeIncrementalIndex(
  Bucket theBucket,
  AggregatorFactory[] aggs,
  HadoopDruidIndexerConfig config,
  Iterable<String> oldDimOrder,
  Map<String, ColumnCapabilitiesImpl> oldCapabilities
)
{
 final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();
 final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
   .withMinTimestamp(theBucket.time.getMillis())
   .withTimestampSpec(config.getSchema().getDataSchema().getParser().getParseSpec().getTimestampSpec())
   .withDimensionsSpec(config.getSchema().getDataSchema().getParser())
   .withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity())
   .withMetrics(aggs)
   .withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup())
   .build();
 IncrementalIndex newIndex = new IncrementalIndex.Builder()
   .setIndexSchema(indexSchema)
   .setReportParseExceptions(!tuningConfig.isIgnoreInvalidRows()) // only used by OffHeapIncrementalIndex
   .setMaxRowCount(tuningConfig.getRowFlushBoundary())
   .setMaxBytesInMemory(TuningConfigs.getMaxBytesInMemoryOrDefault(tuningConfig.getMaxBytesInMemory()))
   .buildOnheap();
 if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) {
  newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities);
 }
 return newIndex;
}

Popular methods of HadoopIngestionSpec

Popular in Java

Reactive rest calls using spring rest template
setRequestProperty (URLConnection)
getSupportFragmentManager (FragmentActivity)
setContentView (Activity)
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
JFrame (javax.swing)
Top Vim plugins

How to use getDataSchemamethodin org.apache.druid.indexer.HadoopIngestionSpec

Best Java code snippets using org.apache.druid.indexer.HadoopIngestionSpec.getDataSchema (Showing top 20 results out of 315)

How to use
getDataSchema
method
in
org.apache.druid.indexer.HadoopIngestionSpec