public InputRowParser getParser() { return schema.getDataSchema().getParser(); }
public FirehoseV2 connect(Object metaData) throws IOException { return ioConfig.getFirehoseFactoryV2().connect(dataSchema.getParser(), metaData); }
public Firehose connect() throws IOException { return ioConfig.getFirehoseFactory().connect(dataSchema.getParser(), null); }
public void verify() { Preconditions.checkNotNull(schema.getDataSchema().getDataSource(), "dataSource"); Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec(), "parseSpec"); Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec().getTimestampSpec(), "timestampSpec"); Preconditions.checkNotNull(schema.getDataSchema().getGranularitySpec(), "granularitySpec"); Preconditions.checkNotNull(pathSpec, "inputSpec"); Preconditions.checkNotNull(schema.getTuningConfig().getWorkingPath(), "workingPath"); Preconditions.checkNotNull(schema.getIOConfig().getSegmentOutputPath(), "segmentOutputPath"); Preconditions.checkNotNull(schema.getTuningConfig().getVersion(), "version"); }
); this.dataSchema = Preconditions.checkNotNull(dataSchema, "dataSchema"); this.parser = Preconditions.checkNotNull((InputRowParser<ByteBuffer>) dataSchema.getParser(), "parser"); this.tuningConfig = Preconditions.checkNotNull(tuningConfig, "tuningConfig"); this.ioConfig = Preconditions.checkNotNull(ioConfig, "ioConfig");
private static IndexIOConfig createIoConfig(TaskToolbox toolbox, DataSchema dataSchema, Interval interval) { return new IndexIOConfig( new IngestSegmentFirehoseFactory( dataSchema.getDataSource(), interval, null, // no filter // set dimensions and metrics names to make sure that the generated dataSchema is used for the firehose dataSchema.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(), Arrays.stream(dataSchema.getAggregators()).map(AggregatorFactory::getName).collect(Collectors.toList()), toolbox.getIndexIO() ), false ); }
@Test public void testSerdeWithInvalidParserMap() throws Exception { String jsonStr = "{" + "\"dataSource\":\"test\"," + "\"parser\":{\"type\":\"invalid\"}," + "\"metricsSpec\":[{\"type\":\"doubleSum\",\"name\":\"metric1\",\"fieldName\":\"col1\"}]," + "\"granularitySpec\":{" + "\"type\":\"arbitrary\"," + "\"queryGranularity\":{\"type\":\"duration\",\"duration\":86400000,\"origin\":\"1970-01-01T00:00:00.000Z\"}," + "\"intervals\":[\"2014-01-01T00:00:00.000Z/2015-01-01T00:00:00.000Z\"]}}"; //no error on serde as parser is converted to InputRowParser lazily when really needed DataSchema schema = jsonMapper.readValue( jsonMapper.writeValueAsString( jsonMapper.readValue(jsonStr, DataSchema.class) ), DataSchema.class ); expectedException.expect(CoreMatchers.instanceOf(IllegalArgumentException.class)); expectedException.expectCause(CoreMatchers.instanceOf(JsonMappingException.class)); expectedException.expectMessage( "Instantiation of [simple type, class org.apache.druid.data.input.impl.StringInputRowParser] value failed: parseSpec" ); // Jackson creates a default type parser (StringInputRowParser) for an invalid type. schema.getParser(); }
@Override protected void setup(Context context) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); aggregators = config.getSchema().getDataSchema().getAggregators(); combiningAggs = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { metricNames.add(aggregators[i].getName()); combiningAggs[i] = aggregators[i].getCombiningFactory(); } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
final Firehose firehose = firehoseFactory.connect(ingestionSchema.getDataSchema().getParser(), firehoseTempDir) ) {
@Override protected void setup(Context context) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); aggregators = config.getSchema().getDataSchema().getAggregators(); combiningAggs = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { combiningAggs[i] = aggregators[i].getCombiningFactory(); } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); aggregators = config.getSchema().getDataSchema().getAggregators(); if (DatasourcePathSpec.checkIfReindexingAndIsUseAggEnabled(config.getSchema().getIOConfig().getPathSpec())) { aggsForSerializingSegmentInputRow = aggregators; } else { // Note: this is required for "delta-ingestion" use case where we are reading rows stored in Druid as well // as late arriving data on HDFS etc. aggsForSerializingSegmentInputRow = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { aggsForSerializingSegmentInputRow[i] = aggregators[i].getCombiningFactory(); } } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
final Appenderator appenderator = newAppenderator(fireDepartmentMetrics, toolbox, dataSchema, tuningConfig); final BatchAppenderatorDriver driver = newDriver(appenderator, toolbox, segmentAllocator); final Firehose firehose = firehoseFactory.connect(dataSchema.getParser(), firehoseTempDir) ) { driver.startJob();
private static IncrementalIndex makeIncrementalIndex( Bucket theBucket, AggregatorFactory[] aggs, HadoopDruidIndexerConfig config, Iterable<String> oldDimOrder, Map<String, ColumnCapabilitiesImpl> oldCapabilities ) { final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig(); final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() .withMinTimestamp(theBucket.time.getMillis()) .withTimestampSpec(config.getSchema().getDataSchema().getParser().getParseSpec().getTimestampSpec()) .withDimensionsSpec(config.getSchema().getDataSchema().getParser()) .withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity()) .withMetrics(aggs) .withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup()) .build(); IncrementalIndex newIndex = new IncrementalIndex.Builder() .setIndexSchema(indexSchema) .setReportParseExceptions(!tuningConfig.isIgnoreInvalidRows()) // only used by OffHeapIncrementalIndex .setMaxRowCount(tuningConfig.getRowFlushBoundary()) .setMaxBytesInMemory(TuningConfigs.getMaxBytesInMemoryOrDefault(tuningConfig.getMaxBytesInMemory())) .buildOnheap(); if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) { newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities); } return newIndex; }
jsonMapper ); schema.getParser();
actual.getParser().getParseSpec(), new JSONParseSpec( new TimestampSpec("xXx", null, null),
jsonMapper ); schema.getParser();
schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions() );
@Test public void testDefaultExclusions() { Map<String, Object> parser = jsonMapper.convertValue( new StringInputRowParser( new JSONParseSpec( new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dimB", "dimA")), null, null), null, null ), null ), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT ); DataSchema schema = new DataSchema( "test", parser, new AggregatorFactory[]{ new DoubleSumAggregatorFactory("metric1", "col1"), new DoubleSumAggregatorFactory("metric2", "col2"), }, new ArbitraryGranularitySpec(Granularities.DAY, ImmutableList.of(Intervals.of("2014/2015"))), null, jsonMapper ); Assert.assertEquals( ImmutableSet.of("time", "col1", "col2", "metric1", "metric2"), schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions() ); }
final StringInputRowParser parser = (StringInputRowParser) schema.getParser();