public SchemaBuilder() { _schema = new Schema(); }
public Schema fetchSchema() { final Schema schema = new Schema(); for (final String column : genSpec.getColumns()) { final FieldSpec spec = buildSpec(genSpec, column); schema.addField(spec); } return schema; }
/** * For segments on disk. * <p>Index directory passed in should be top level segment directory. * <p>If segment metadata file exists in multiple segment version, load the one in highest segment version. */ public SegmentMetadataImpl(File indexDir) throws IOException { _indexDir = indexDir; PropertiesConfiguration segmentMetadataPropertiesConfiguration = getPropertiesConfiguration(indexDir); _columnMetadataMap = new HashMap<>(); _allColumns = new HashSet<>(); _schema = new Schema(); init(segmentMetadataPropertiesConfiguration); File creationMetaFile = SegmentDirectoryPaths.findCreationMetaFile(indexDir); if (creationMetaFile != null) { loadCreationMeta(creationMetaFile); } setTimeInfo(segmentMetadataPropertiesConfiguration); _totalDocs = segmentMetadataPropertiesConfiguration.getInt(SEGMENT_TOTAL_DOCS); _totalRawDocs = segmentMetadataPropertiesConfiguration.getInt(SEGMENT_TOTAL_RAW_DOCS, _totalDocs); }
/** * Helper method to build schema for the segment on which aggregation tests will be run. * * @return */ private Schema buildSchema() { Schema schema = new Schema(); for (int i = 0; i < NUM_METRIC_COLUMNS; i++) { String metricName = METRIC_PREFIX + i; MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, FieldSpec.DataType.DOUBLE); schema.addField(metricFieldSpec); _columns[i] = metricName; } return schema; }
/** * Returns a new schema based on the original one. The new schema removes columns as needed (for ex, virtual cols) * and adds the new timespec to the schema. */ @VisibleForTesting public Schema getUpdatedSchema(Schema original) { TimeFieldSpec tfs = original.getTimeFieldSpec(); // Use outgoing granularity for creating segment TimeGranularitySpec outgoing = tfs.getOutgoingGranularitySpec(); TimeFieldSpec newTimeSpec = new TimeFieldSpec(outgoing); Schema newSchema = new Schema(); newSchema.addField(newTimeSpec); for (String col : original.getPhysicalColumnNames()) { if (!col.equals(tfs.getName())) { newSchema.addField(original.getFieldSpecFor(col)); } } return newSchema; } }
/** * Setup to build a segment with raw indexes (no-dictionary) of various data types. * * @throws Exception */ @BeforeClass public void setup() throws Exception { _schema = new Schema(); _schema.addField(new DimensionFieldSpec(FIXED_BYTE_SORTED_COLUMN, FieldSpec.DataType.BYTES, true)); _schema.addField(new DimensionFieldSpec(FIXED_BYTES_UNSORTED_COLUMN, FieldSpec.DataType.BYTES, true)); _schema.addField(new DimensionFieldSpec(FIXED_BYTES_NO_DICT_COLUMN, FieldSpec.DataType.BYTES, true)); _schema.addField(new DimensionFieldSpec(VARIABLE_BYTES_COLUMN, FieldSpec.DataType.BYTES, true)); _random = new Random(System.nanoTime()); _recordReader = buildIndex(_schema); _segment = ImmutableSegmentLoader.load(new File(SEGMENT_DIR_NAME, SEGMENT_NAME), ReadMode.heap); }
@Test public void testNoVirtualColumnsInSchema() { Schema schema = new Schema(); FieldSpec spec = new DimensionFieldSpec("col1", FieldSpec.DataType.STRING, true); schema.addField(spec); TimeFieldSpec tfs = new TimeFieldSpec("col1", FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS, "col2", FieldSpec.DataType.LONG, TimeUnit.DAYS); schema.addField(tfs); VirtualColumnProviderFactory.addBuiltInVirtualColumnsToSchema(schema); Assert.assertEquals(schema.getColumnNames().size(), 5); Assert.assertEquals(schema.getTimeFieldSpec().getIncomingGranularitySpec().getTimeType(), TimeUnit.MILLISECONDS); RealtimeSegmentConverter converter = new RealtimeSegmentConverter(null, "", schema, "testTable", "col1", "segment1", "col1"); Schema newSchema = converter.getUpdatedSchema(schema); Assert.assertEquals(newSchema.getColumnNames().size(), 2); Assert.assertEquals(newSchema.getTimeFieldSpec().getIncomingGranularitySpec().getTimeType(), TimeUnit.DAYS); } }
/** * This test generates an avro with TDigest BYTES data, and tests segment generation. */ @Test public void testTDigestAvro() throws Exception { Schema schema = new Schema(); schema.addField(new MetricFieldSpec(FIXED_BYTES_UNSORTED_COLUMN, FieldSpec.DataType.BYTES)); schema.addField(new MetricFieldSpec(VARIABLE_BYTES_COLUMN, FieldSpec.DataType.BYTES)); List<byte[]> _fixedExpected = new ArrayList<>(NUM_ROWS); List<byte[]> _varExpected = new ArrayList<>(NUM_ROWS); buildAvro(schema, _fixedExpected, _varExpected); IndexSegment segment = buildSegmentFromAvro(schema, AVRO_DIR_NAME, AVRO_NAME, SEGMENT_NAME); SegmentMetadata metadata = segment.getSegmentMetadata(); Assert.assertTrue(metadata.hasDictionary(FIXED_BYTES_UNSORTED_COLUMN)); Assert.assertFalse(metadata.hasDictionary(VARIABLE_BYTES_COLUMN)); PinotSegmentRecordReader reader = new PinotSegmentRecordReader(new File(AVRO_DIR_NAME, SEGMENT_NAME)); GenericRow row = new GenericRow(); int i = 0; while (reader.hasNext()) { row = reader.next(row); Assert.assertEquals(ByteArray.compare((byte[]) row.getValue(FIXED_BYTES_UNSORTED_COLUMN), _fixedExpected.get(i)), 0); Assert.assertEquals(ByteArray.compare((byte[]) row.getValue(VARIABLE_BYTES_COLUMN), _varExpected.get(i++)), 0); } segment.destroy(); }
@Test public void testByteType() throws DecoderException, IOException { Schema expectedSchema = new Schema(); byte[] expectedEmptyDefault = new byte[0]; byte[] expectedNonEmptyDefault = Hex.decodeHex("abcd1234".toCharArray()); expectedSchema.setSchemaName("test"); expectedSchema.addField(new MetricFieldSpec("noDefault", FieldSpec.DataType.BYTES)); expectedSchema.addField(new MetricFieldSpec("emptyDefault", FieldSpec.DataType.BYTES, expectedEmptyDefault)); expectedSchema.addField(new MetricFieldSpec("nonEmptyDefault", FieldSpec.DataType.BYTES, expectedNonEmptyDefault)); // Ensure that schema can be serialized and de-serialized (ie byte[] converted to String and back). String jsonSchema = expectedSchema.getJSONSchema(); Schema actualSchema = Schema.fromString(jsonSchema); Assert.assertEquals(actualSchema.getFieldSpecFor("noDefault").getDefaultNullValue(), expectedEmptyDefault); Assert.assertEquals(actualSchema.getFieldSpecFor("emptyDefault").getDefaultNullValue(), expectedEmptyDefault); Assert.assertEquals(actualSchema.getFieldSpecFor("nonEmptyDefault").getDefaultNullValue(), expectedNonEmptyDefault); Assert.assertEquals(actualSchema, expectedSchema); Assert.assertEquals(actualSchema.hashCode(), expectedSchema.hashCode()); } }
private void setupRealtimeTable() throws IOException { // Set up the realtime table. Map<String, String> streamConfigs = new HashMap<>(); streamConfigs.put("streamType", "kafka"); streamConfigs.put("stream.kafka.consumer.type", "highLevel"); streamConfigs.put("stream.kafka.topic.name", "kafkaTopic"); streamConfigs .put("stream.kafka.decoder.class.name", "org.apache.pinot.core.realtime.impl.kafka.KafkaAvroMessageDecoder"); streamConfigs.put("stream.kafka.hlc.zk.connect.string", "localhost:1111/zkConnect"); streamConfigs.put("stream.kafka.decoder.prop.schema.registry.rest.url", "http://localhost:2222/schemaRegistry"); TableConfig realtimeTimeConfig = new TableConfig.Builder(CommonConstants.Helix.TableType.REALTIME).setTableName(RAW_DINING_TABLE_NAME) .setTimeColumnName("timeColumn").setTimeType("DAYS"). setStreamConfigs(streamConfigs).build(); Schema schema = new Schema(); schema.setSchemaName(RAW_DINING_TABLE_NAME); _pinotResourceManager.addOrUpdateSchema(schema); // Fake an PinotLLCRealtimeSegmentManager instance: required for a realtime table creation. PinotLLCRealtimeSegmentManager .create(_pinotResourceManager, new ControllerConf(), new ControllerMetrics(new MetricsRegistry())); _pinotResourceManager.addTable(realtimeTimeConfig); _helixBrokerStarter.getHelixExternalViewBasedRouting() .markDataResourceOnline(realtimeTimeConfig, null, new ArrayList<InstanceConfig>()); }
private Schema createSchema(boolean isSimpleDate) { Schema schema = new Schema(); schema.addField(new DimensionFieldSpec(STRING_COL_NAME, FieldSpec.DataType.STRING, true)); if (isSimpleDate) { schema.addField(new TimeFieldSpec(TIME_COL_NAME, FieldSpec.DataType.INT, TimeUnit.DAYS)); } else { schema.addField(new TimeFieldSpec(TIME_COL_NAME, FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS)); } return schema; }
/** * Setup to build a segment with raw indexes (no-dictionary) of various data types. * * @throws Exception */ @BeforeClass public void setup() throws Exception { Schema schema = new Schema(); schema.addField(new DimensionFieldSpec(INT_COLUMN, FieldSpec.DataType.INT, true)); schema.addField(new DimensionFieldSpec(LONG_COLUMN, FieldSpec.DataType.LONG, true)); schema.addField(new DimensionFieldSpec(FLOAT_COLUMN, FieldSpec.DataType.FLOAT, true)); schema.addField(new DimensionFieldSpec(DOUBLE_COLUMN, FieldSpec.DataType.DOUBLE, true)); schema.addField(new DimensionFieldSpec(STRING_COLUMN, FieldSpec.DataType.STRING, true)); _random = new Random(System.nanoTime()); _recordReader = buildIndex(schema); }
@Test public void testTimeConversion() { // When incoming time exists and is valid, do the conversion Schema schema = new Schema(); schema.addField(new TimeFieldSpec(new TimeGranularitySpec(FieldSpec.DataType.INT, TimeUnit.DAYS, "incoming"), new TimeGranularitySpec(FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS, "outgoing"))); TimeTransformer transformer = new TimeTransformer(schema); GenericRow record = new GenericRow(); record.putField("incoming", TimeUnit.MILLISECONDS.toDays(VALID_TIME)); record = transformer.transform(record); assertNotNull(record); assertEquals(record.getValue("outgoing"), VALID_TIME); // When incoming and outgoing time column is the same, and the value is not yet converted, do the conversion schema = new Schema(); schema.addField(new TimeFieldSpec(new TimeGranularitySpec(FieldSpec.DataType.INT, TimeUnit.DAYS, "time"), new TimeGranularitySpec(FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS, "time"))); transformer = new TimeTransformer(schema); record = new GenericRow(); record.putField("time", TimeUnit.MILLISECONDS.toDays(VALID_TIME)); record = transformer.transform(record); assertNotNull(record); assertEquals(record.getValue("time"), VALID_TIME); } }
@Test public void testTimeFormat() { // When incoming and outgoing spec are the same, any time format should work Schema schema = new Schema(); schema.addField(new TimeFieldSpec(new TimeGranularitySpec(FieldSpec.DataType.INT, TimeUnit.DAYS, TimeGranularitySpec.TimeFormat.SIMPLE_DATE_FORMAT.toString(), "time"))); TimeTransformer transformer = new TimeTransformer(schema); GenericRow record = new GenericRow(); record.putField("time", 20180101); record = transformer.transform(record); assertNotNull(record); assertEquals(record.getValue("time"), 20180101); // When incoming and outgoing spec are not the same, simple date format is not allowed schema = new Schema(); schema.addField(new TimeFieldSpec(new TimeGranularitySpec(FieldSpec.DataType.INT, TimeUnit.DAYS, TimeGranularitySpec.TimeFormat.SIMPLE_DATE_FORMAT.toString(), "incoming"), new TimeGranularitySpec(FieldSpec.DataType.LONG, TimeUnit.SECONDS, "outgoing"))); try { new TimeTransformer(schema); fail(); } catch (IllegalArgumentException e) { // Expected } }
protected Schema createDummySchema(String tableName) { Schema schema = new Schema(); schema.setSchemaName(tableName); schema.addField(new DimensionFieldSpec("dimA", FieldSpec.DataType.STRING, true, "")); schema.addField(new DimensionFieldSpec("dimB", FieldSpec.DataType.STRING, true, 0)); schema.addField(new MetricFieldSpec("metricA", FieldSpec.DataType.INT, 0)); schema.addField(new MetricFieldSpec("metricB", FieldSpec.DataType.DOUBLE, -1)); return schema; }
Schema schema = new Schema(); schema.addField(new DimensionFieldSpec(column, dataType, true)); StatsCollectorConfig statsCollectorConfig = new StatsCollectorConfig(schema, null);
/** * Helper method to build a schema with provided number of metric columns. * * @return Schema containing the given number of metric columns */ private static Schema buildSchema() { Schema schema = new Schema(); schema.addField(new DimensionFieldSpec(INT_COLUMN, FieldSpec.DataType.INT, true)); schema.addField(new DimensionFieldSpec(LONG_COLUMN, FieldSpec.DataType.LONG, true)); schema.addField(new DimensionFieldSpec(FLOAT_COLUMN, FieldSpec.DataType.FLOAT, true)); schema.addField(new DimensionFieldSpec(DOUBLE_COLUMN, FieldSpec.DataType.DOUBLE, true)); schema.addField(new DimensionFieldSpec(STRING_COLUMN, FieldSpec.DataType.STRING, true)); return schema; } }
private Schema createPinotSchemaWithTimeSpec(TimeFieldSpec timeSpec) { Schema testSchema = new Schema(); testSchema.setSchemaName("schema"); FieldSpec spec; spec = new DimensionFieldSpec(D1, DataType.STRING, true); testSchema.addField(spec); spec = new DimensionFieldSpec(D2, DataType.STRING, true); testSchema.addField(spec); spec = new MetricFieldSpec(M1, DataType.INT); testSchema.addField(spec); spec = new MetricFieldSpec(M2, DataType.FLOAT); testSchema.addField(spec); testSchema.addField(timeSpec); return testSchema; }
private Schema createPinotSchema() { Schema testSchema = new Schema(); testSchema.setSchemaName("schema"); testSchema.addField(new DimensionFieldSpec(D_SV_1, DataType.STRING, true)); testSchema.addField(new DimensionFieldSpec(D_MV_1, FieldSpec.DataType.STRING, false)); testSchema.addField(new MetricFieldSpec(M1, FieldSpec.DataType.INT)); testSchema.addField(new MetricFieldSpec(M2, FieldSpec.DataType.FLOAT)); testSchema.addField(new TimeFieldSpec(new TimeGranularitySpec(FieldSpec.DataType.LONG, TimeUnit.HOURS, TIME))); return testSchema; }
private Schema createPinotSchema() { Schema testSchema = new Schema(); testSchema.setSchemaName("schema"); testSchema.addField(new DimensionFieldSpec(D_SV_1, FieldSpec.DataType.STRING, true)); testSchema.addField(new DimensionFieldSpec(D_SV_2, FieldSpec.DataType.INT, true)); testSchema.addField(new DimensionFieldSpec(D_MV_1, FieldSpec.DataType.STRING, false)); testSchema.addField(new MetricFieldSpec(M1, FieldSpec.DataType.INT)); testSchema.addField(new MetricFieldSpec(M2, FieldSpec.DataType.FLOAT)); testSchema.addField(new TimeFieldSpec(new TimeGranularitySpec(FieldSpec.DataType.LONG, TimeUnit.HOURS, TIME))); return testSchema; }