org.apache.druid.segment.indexing.DataSchema.getParser java code examples

public InputRowParser getParser()
{
 return schema.getDataSchema().getParser();
}

public FirehoseV2 connect(Object metaData) throws IOException
{
 return ioConfig.getFirehoseFactoryV2().connect(dataSchema.getParser(), metaData);
}

public Firehose connect() throws IOException
{
 return ioConfig.getFirehoseFactory().connect(dataSchema.getParser(), null);
}

public void verify()
{
 Preconditions.checkNotNull(schema.getDataSchema().getDataSource(), "dataSource");
 Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec(), "parseSpec");
 Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec().getTimestampSpec(), "timestampSpec");
 Preconditions.checkNotNull(schema.getDataSchema().getGranularitySpec(), "granularitySpec");
 Preconditions.checkNotNull(pathSpec, "inputSpec");
 Preconditions.checkNotNull(schema.getTuningConfig().getWorkingPath(), "workingPath");
 Preconditions.checkNotNull(schema.getIOConfig().getSegmentOutputPath(), "segmentOutputPath");
 Preconditions.checkNotNull(schema.getTuningConfig().getVersion(), "version");
}

);
this.dataSchema = Preconditions.checkNotNull(dataSchema, "dataSchema");
this.parser = Preconditions.checkNotNull((InputRowParser<ByteBuffer>) dataSchema.getParser(), "parser");
this.tuningConfig = Preconditions.checkNotNull(tuningConfig, "tuningConfig");
this.ioConfig = Preconditions.checkNotNull(ioConfig, "ioConfig");

private static IndexIOConfig createIoConfig(TaskToolbox toolbox, DataSchema dataSchema, Interval interval)
{
 return new IndexIOConfig(
   new IngestSegmentFirehoseFactory(
     dataSchema.getDataSource(),
     interval,
     null, // no filter
     // set dimensions and metrics names to make sure that the generated dataSchema is used for the firehose
     dataSchema.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(),
     Arrays.stream(dataSchema.getAggregators()).map(AggregatorFactory::getName).collect(Collectors.toList()),
     toolbox.getIndexIO()
   ),
   false
 );
}

@Test
public void testSerdeWithInvalidParserMap() throws Exception
{
 String jsonStr = "{"
          + "\"dataSource\":\"test\","
          + "\"parser\":{\"type\":\"invalid\"},"
          + "\"metricsSpec\":[{\"type\":\"doubleSum\",\"name\":\"metric1\",\"fieldName\":\"col1\"}],"
          + "\"granularitySpec\":{"
          + "\"type\":\"arbitrary\","
          + "\"queryGranularity\":{\"type\":\"duration\",\"duration\":86400000,\"origin\":\"1970-01-01T00:00:00.000Z\"},"
          + "\"intervals\":[\"2014-01-01T00:00:00.000Z/2015-01-01T00:00:00.000Z\"]}}";
 //no error on serde as parser is converted to InputRowParser lazily when really needed
 DataSchema schema = jsonMapper.readValue(
   jsonMapper.writeValueAsString(
     jsonMapper.readValue(jsonStr, DataSchema.class)
   ),
   DataSchema.class
 );
 expectedException.expect(CoreMatchers.instanceOf(IllegalArgumentException.class));
 expectedException.expectCause(CoreMatchers.instanceOf(JsonMappingException.class));
 expectedException.expectMessage(
   "Instantiation of [simple type, class org.apache.druid.data.input.impl.StringInputRowParser] value failed: parseSpec"
 );
 // Jackson creates a default type parser (StringInputRowParser) for an invalid type.
 schema.getParser();
}

@Override
protected void setup(Context context)
{
 config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
 aggregators = config.getSchema().getDataSchema().getAggregators();
 combiningAggs = new AggregatorFactory[aggregators.length];
 for (int i = 0; i < aggregators.length; ++i) {
  metricNames.add(aggregators[i].getName());
  combiningAggs[i] = aggregators[i].getCombiningFactory();
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

  final Firehose firehose = firehoseFactory.connect(ingestionSchema.getDataSchema().getParser(), firehoseTempDir)
) {

@Override
protected void setup(Context context)
{
 config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
 aggregators = config.getSchema().getDataSchema().getAggregators();
 combiningAggs = new AggregatorFactory[aggregators.length];
 for (int i = 0; i < aggregators.length; ++i) {
  combiningAggs[i] = aggregators[i].getCombiningFactory();
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

.withTimestampSpec(schema.getParser())
.withQueryGranularity(schema.getGranularitySpec().getQueryGranularity())
.withDimensionsSpec(schema.getParser())
.withMetrics(schema.getAggregators())
.withRollup(schema.getGranularitySpec().isRollup())

@Override
protected void setup(Context context)
  throws IOException, InterruptedException
{
 super.setup(context);
 aggregators = config.getSchema().getDataSchema().getAggregators();
 if (DatasourcePathSpec.checkIfReindexingAndIsUseAggEnabled(config.getSchema().getIOConfig().getPathSpec())) {
  aggsForSerializingSegmentInputRow = aggregators;
 } else {
  // Note: this is required for "delta-ingestion" use case where we are reading rows stored in Druid as well
  // as late arriving data on HDFS etc.
  aggsForSerializingSegmentInputRow = new AggregatorFactory[aggregators.length];
  for (int i = 0; i < aggregators.length; ++i) {
   aggsForSerializingSegmentInputRow[i] = aggregators[i].getCombiningFactory();
  }
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

  final Appenderator appenderator = newAppenderator(fireDepartmentMetrics, toolbox, dataSchema, tuningConfig);
  final BatchAppenderatorDriver driver = newDriver(appenderator, toolbox, segmentAllocator);
  final Firehose firehose = firehoseFactory.connect(dataSchema.getParser(), firehoseTempDir)
) {
 driver.startJob();

private static IncrementalIndex makeIncrementalIndex(
  Bucket theBucket,
  AggregatorFactory[] aggs,
  HadoopDruidIndexerConfig config,
  Iterable<String> oldDimOrder,
  Map<String, ColumnCapabilitiesImpl> oldCapabilities
)
{
 final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();
 final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
   .withMinTimestamp(theBucket.time.getMillis())
   .withTimestampSpec(config.getSchema().getDataSchema().getParser().getParseSpec().getTimestampSpec())
   .withDimensionsSpec(config.getSchema().getDataSchema().getParser())
   .withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity())
   .withMetrics(aggs)
   .withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup())
   .build();
 IncrementalIndex newIndex = new IncrementalIndex.Builder()
   .setIndexSchema(indexSchema)
   .setReportParseExceptions(!tuningConfig.isIgnoreInvalidRows()) // only used by OffHeapIncrementalIndex
   .setMaxRowCount(tuningConfig.getRowFlushBoundary())
   .setMaxBytesInMemory(TuningConfigs.getMaxBytesInMemoryOrDefault(tuningConfig.getMaxBytesInMemory()))
   .buildOnheap();
 if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) {
  newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities);
 }
 return newIndex;
}

  jsonMapper
);
schema.getParser();

actual.getParser().getParseSpec(),
new JSONParseSpec(
  new TimestampSpec("xXx", null, null),

  jsonMapper
);
schema.getParser();

  schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions()
);

@Test
public void testDefaultExclusions()
{
 Map<String, Object> parser = jsonMapper.convertValue(
   new StringInputRowParser(
     new JSONParseSpec(
       new TimestampSpec("time", "auto", null),
       new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dimB", "dimA")), null, null),
       null,
       null
     ),
     null
   ), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT
 );
 DataSchema schema = new DataSchema(
   "test",
   parser,
   new AggregatorFactory[]{
     new DoubleSumAggregatorFactory("metric1", "col1"),
     new DoubleSumAggregatorFactory("metric2", "col2"),
     },
   new ArbitraryGranularitySpec(Granularities.DAY, ImmutableList.of(Intervals.of("2014/2015"))),
   null,
   jsonMapper
 );
 Assert.assertEquals(
   ImmutableSet.of("time", "col1", "col2", "metric1", "metric2"),
   schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions()
 );
}

final StringInputRowParser parser = (StringInputRowParser) schema.getParser();

Popular methods of DataSchema

Popular in Java

Parsing JSON documents to java classes using gson
getExternalFilesDir (Context)
setContentView (Activity)
notifyDataSetChanged (ArrayAdapter)
Connection (java.sql)
A connection represents a link from a Java application to a database. All SQL statements and results
JarFile (java.util.jar)
JarFile is used to read jar entries and their associated data from jar files.
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
JComboBox (javax.swing)
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
Top plugins for WebStorm

How to use getParsermethodin org.apache.druid.segment.indexing.DataSchema

Best Java code snippets using org.apache.druid.segment.indexing.DataSchema.getParser (Showing top 20 results out of 315)

How to use
getParser
method
in
org.apache.druid.segment.indexing.DataSchema