public Builder withTimestampSpec(InputRowParser parser) { if (parser != null && parser.getParseSpec() != null && parser.getParseSpec().getTimestampSpec() != null) { this.timestampSpec = parser.getParseSpec().getTimestampSpec(); } else { this.timestampSpec = new TimestampSpec(null, null, null); } return this; }
@VisibleForTesting static String typeStringFromParseSpec(ParseSpec parseSpec) { StringBuilder builder = new StringBuilder("struct<"); builder.append(parseSpec.getTimestampSpec().getTimestampColumn()).append(":string"); // the typeString seems positionally dependent, so repeated timestamp column causes incorrect mapping if (parseSpec.getDimensionsSpec().getDimensionNames().size() > 0) { builder.append(","); builder.append(String.join( ":string,", parseSpec.getDimensionsSpec() .getDimensionNames() .stream() .filter(s -> !s.equals(parseSpec.getTimestampSpec().getTimestampColumn())) .collect(Collectors.toList()))); builder.append(":string"); } builder.append(">"); return builder.toString(); }
@Override public List<InputRow> parseBatch(Map<String, Object> theMap) { final List<String> dimensions; if (!this.dimensions.isEmpty()) { dimensions = this.dimensions; } else { dimensions = Lists.newArrayList( Sets.difference(theMap.keySet(), parseSpec.getDimensionsSpec().getDimensionExclusions()) ); } final DateTime timestamp; try { timestamp = parseSpec.getTimestampSpec().extractTimestamp(theMap); if (timestamp == null) { final String input = theMap.toString(); throw new NullPointerException( StringUtils.format( "Null timestamp in input: %s", input.length() < 100 ? input : input.substring(0, 100) + "..." ) ); } } catch (Exception e) { throw new ParseException(e, "Unparseable timestamp found! Event: %s", theMap); } return ImmutableList.of(new MapBasedInputRow(timestamp, dimensions, theMap)); }
@JsonCreator public ParquetAvroHadoopInputRowParser( @JsonProperty("parseSpec") ParseSpec parseSpec, @JsonProperty("binaryAsString") Boolean binaryAsString ) { this.parseSpec = parseSpec; this.timestampSpec = parseSpec.getTimestampSpec(); this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames(); this.binaryAsString = binaryAsString == null ? false : binaryAsString; final JSONPathSpec flattenSpec; if (parseSpec != null && (parseSpec instanceof AvroParseSpec)) { flattenSpec = ((AvroParseSpec) parseSpec).getFlattenSpec(); } else { flattenSpec = JSONPathSpec.DEFAULT; } this.recordFlattener = ObjectFlatteners.create( flattenSpec, new AvroFlattenerMaker(false, this.binaryAsString) ); }
public void verify() { Preconditions.checkNotNull(schema.getDataSchema().getDataSource(), "dataSource"); Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec(), "parseSpec"); Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec().getTimestampSpec(), "timestampSpec"); Preconditions.checkNotNull(schema.getDataSchema().getGranularitySpec(), "granularitySpec"); Preconditions.checkNotNull(pathSpec, "inputSpec"); Preconditions.checkNotNull(schema.getTuningConfig().getWorkingPath(), "workingPath"); Preconditions.checkNotNull(schema.getIOConfig().getSegmentOutputPath(), "segmentOutputPath"); Preconditions.checkNotNull(schema.getTuningConfig().getVersion(), "version"); }
parseSpec.getTimestampSpec().extractTimestamp(record), dimensions, record
final TimestampSpec timestampSpec = inputRowParser.getParseSpec().getTimestampSpec();
parseSpec.getTimestampSpec().extractTimestamp(record), dimensions, record
String tsField = parseSpec.getTimestampSpec().getTimestampColumn();
String tsField = config.getParser().getParseSpec().getTimestampSpec().getTimestampColumn();
TimestampSpec timestampSpec = parseSpec.getTimestampSpec(); DateTime dateTime = timestampSpec.extractTimestamp(map);
@Test public void testSerde() throws IOException { final String json = "{" + "\"format\":\"timeAndDims\", " + "\"timestampSpec\": {\"column\":\"timestamp\"}, " + "\"dimensionsSpec\":{}" + "}"; final Object mapValue = mapper.readValue(json, JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT); final ParseSpec parseSpec = mapper.convertValue(mapValue, ParseSpec.class); Assert.assertEquals(TimeAndDimsParseSpec.class, parseSpec.getClass()); Assert.assertEquals("timestamp", parseSpec.getTimestampSpec().getTimestampColumn()); Assert.assertEquals(ImmutableList.of(), parseSpec.getDimensionsSpec().getDimensionNames()); // Test round-trip. Assert.assertEquals( parseSpec, mapper.readValue(mapper.writeValueAsString(parseSpec), ParseSpec.class) ); }
private static IncrementalIndex makeIncrementalIndex( Bucket theBucket, AggregatorFactory[] aggs, HadoopDruidIndexerConfig config, Iterable<String> oldDimOrder, Map<String, ColumnCapabilitiesImpl> oldCapabilities ) { final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig(); final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() .withMinTimestamp(theBucket.time.getMillis()) .withTimestampSpec(config.getSchema().getDataSchema().getParser().getParseSpec().getTimestampSpec()) .withDimensionsSpec(config.getSchema().getDataSchema().getParser()) .withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity()) .withMetrics(aggs) .withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup()) .build(); IncrementalIndex newIndex = new IncrementalIndex.Builder() .setIndexSchema(indexSchema) .setReportParseExceptions(!tuningConfig.isIgnoreInvalidRows()) // only used by OffHeapIncrementalIndex .setMaxRowCount(tuningConfig.getRowFlushBoundary()) .setMaxBytesInMemory(TuningConfigs.getMaxBytesInMemoryOrDefault(tuningConfig.getMaxBytesInMemory())) .buildOnheap(); if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) { newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities); } return newIndex; }
public Builder withTimestampSpec(InputRowParser parser) { if (parser != null && parser.getParseSpec() != null && parser.getParseSpec().getTimestampSpec() != null) { this.timestampSpec = parser.getParseSpec().getTimestampSpec(); } else { this.timestampSpec = new TimestampSpec(null, null, null); } return this; }
public void verify() { Preconditions.checkNotNull(schema.getDataSchema().getDataSource(), "dataSource"); Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec(), "parseSpec"); Preconditions.checkNotNull(schema.getDataSchema().getParser().getParseSpec().getTimestampSpec(), "timestampSpec"); Preconditions.checkNotNull(schema.getDataSchema().getGranularitySpec(), "granularitySpec"); Preconditions.checkNotNull(pathSpec, "inputSpec"); Preconditions.checkNotNull(schema.getTuningConfig().getWorkingPath(), "workingPath"); Preconditions.checkNotNull(schema.getIOConfig().getSegmentOutputPath(), "segmentOutputPath"); Preconditions.checkNotNull(schema.getTuningConfig().getVersion(), "version"); }
@Override public List<InputRow> parseBatch(Map<String, Object> theMap) { final List<String> dimensions = parseSpec.getDimensionsSpec().hasCustomDimensions() ? parseSpec.getDimensionsSpec().getDimensionNames() : Lists.newArrayList( Sets.difference( theMap.keySet(), parseSpec.getDimensionsSpec() .getDimensionExclusions() ) ); final DateTime timestamp; try { timestamp = parseSpec.getTimestampSpec().extractTimestamp(theMap); if (timestamp == null) { final String input = theMap.toString(); throw new NullPointerException( StringUtils.format( "Null timestamp in input: %s", input.length() < 100 ? input : input.substring(0, 100) + "..." ) ); } } catch (Exception e) { throw new ParseException(e, "Unparseable timestamp found! Event: %s", theMap); } return ImmutableList.of(new MapBasedInputRow(timestamp.getMillis(), dimensions, theMap)); }
final TimestampSpec timestampSpec = inputRowParser.getParseSpec().getTimestampSpec();
private static IncrementalIndex makeIncrementalIndex( Bucket theBucket, AggregatorFactory[] aggs, HadoopDruidIndexerConfig config, Iterable<String> oldDimOrder, Map<String, ColumnCapabilitiesImpl> oldCapabilities ) { final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig(); final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() .withMinTimestamp(theBucket.time.getMillis()) .withTimestampSpec(config.getSchema().getDataSchema().getParser().getParseSpec().getTimestampSpec()) .withDimensionsSpec(config.getSchema().getDataSchema().getParser()) .withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity()) .withMetrics(aggs) .withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup()) .build(); IncrementalIndex newIndex = new IncrementalIndex.Builder() .setIndexSchema(indexSchema) .setReportParseExceptions(!tuningConfig.isIgnoreInvalidRows()) // only used by OffHeapIncrementalIndex .setMaxRowCount(tuningConfig.getRowFlushBoundary()) .setMaxBytesInMemory(TuningConfigs.getMaxBytesInMemoryOrDefault(tuningConfig.getMaxBytesInMemory())) .buildOnheap(); if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) { newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities); } return newIndex; }