public SchemaBuilder addMetric(@Nonnull String metricName, @Nonnull DataType dataType, @Nonnull Object defaultNullValue) { _schema.addField(new MetricFieldSpec(metricName, dataType, defaultNullValue)); return this; }
public SchemaBuilder addMetric(@Nonnull String name, @Nonnull DataType dataType, int fieldSize, @Nonnull MetricFieldSpec.DerivedMetricType derivedMetricType) { _schema.addField(new MetricFieldSpec(name, dataType, fieldSize, derivedMetricType)); return this; }
public SchemaBuilder addMetric(@Nonnull String name, @Nonnull DataType dataType, int fieldSize, @Nonnull MetricFieldSpec.DerivedMetricType derivedMetricType, @Nonnull Object defaultNullValue) { _schema.addField(new MetricFieldSpec(name, dataType, fieldSize, derivedMetricType, defaultNullValue)); return this; }
public SchemaBuilder addMetric(@Nonnull String metricName, @Nonnull DataType dataType) { _schema.addField(new MetricFieldSpec(metricName, dataType)); return this; }
private void addDerivedFieldsInSchema() { if (createHllIndex) { Collection<String> columnNames = dataSchema.getColumnNames(); HllConfig hllConfig = config.getHllConfig(); for (String derivedFieldName : hllConfig.getDerivedHllFieldToOriginMap().keySet()) { if (columnNames.contains(derivedFieldName)) { throw new IllegalArgumentException( "Cannot add derived field: " + derivedFieldName + " since it already exists in schema."); } else { dataSchema.addField( new MetricFieldSpec(derivedFieldName, FieldSpec.DataType.STRING, hllConfig.getHllFieldSize(), MetricFieldSpec.DerivedMetricType.HLL)); } } } }
private FieldSpec buildSpec(DataGeneratorSpec genSpec, String column) { DataType dataType = genSpec.getDataTypesMap().get(column); FieldType fieldType = genSpec.getFieldTypesMap().get(column); FieldSpec spec; switch (fieldType) { case DIMENSION: spec = new DimensionFieldSpec(); break; case METRIC: spec = new MetricFieldSpec(); break; case TIME: spec = new TimeFieldSpec(column, dataType, genSpec.getTimeUnitMap().get(column)); break; default: throw new RuntimeException("Invalid Field type."); } spec.setName(column); spec.setDataType(dataType); spec.setSingleValueField(true); return spec; }
/** * Helper method to build schema for the segment on which aggregation tests will be run. * * @return */ private Schema buildSchema() { Schema schema = new Schema(); for (int i = 0; i < NUM_METRIC_COLUMNS; i++) { String metricName = METRIC_PREFIX + i; MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, FieldSpec.DataType.DOUBLE); schema.addField(metricFieldSpec); _columns[i] = metricName; } return schema; }
/** * Test derived {@link MetricFieldSpec}. */ @Test public void testDerivedMetricFieldSpec() throws Exception { MetricFieldSpec derivedMetricField = new MetricFieldSpec("derivedMetric", STRING, 10, MetricFieldSpec.DerivedMetricType.HLL); Assert.assertEquals(derivedMetricField.getFieldSize(), 10); Assert.assertTrue(derivedMetricField.isDerivedMetric()); Assert.assertEquals(derivedMetricField.getDerivedMetricType(), MetricFieldSpec.DerivedMetricType.HLL); Assert.assertEquals(derivedMetricField.getDefaultNullValue(), "null"); // Test serialize deserialize. MetricFieldSpec derivedMetricField2 = JsonUtils.stringToObject(derivedMetricField.toJsonObject().toString(), MetricFieldSpec.class); Assert.assertEquals(derivedMetricField2, derivedMetricField); }
private DataFetcher makeDataFetcher(long seed) { FieldSpec intSpec = new MetricFieldSpec(INT_COL_NAME, FieldSpec.DataType.INT); FieldSpec longSpec = new MetricFieldSpec(LONG_COL_NAME, FieldSpec.DataType.LONG); FieldSpec floatSpec = new MetricFieldSpec(FLOAT_COL_NAME, FieldSpec.DataType.FLOAT); FieldSpec doubleSpec = new MetricFieldSpec(DOUBLE_COL_NAME, FieldSpec.DataType.DOUBLE); _random = new Random(seed);
@Test public void testByteType() throws DecoderException, IOException { Schema expectedSchema = new Schema(); byte[] expectedEmptyDefault = new byte[0]; byte[] expectedNonEmptyDefault = Hex.decodeHex("abcd1234".toCharArray()); expectedSchema.setSchemaName("test"); expectedSchema.addField(new MetricFieldSpec("noDefault", FieldSpec.DataType.BYTES)); expectedSchema.addField(new MetricFieldSpec("emptyDefault", FieldSpec.DataType.BYTES, expectedEmptyDefault)); expectedSchema.addField(new MetricFieldSpec("nonEmptyDefault", FieldSpec.DataType.BYTES, expectedNonEmptyDefault)); // Ensure that schema can be serialized and de-serialized (ie byte[] converted to String and back). String jsonSchema = expectedSchema.getJSONSchema(); Schema actualSchema = Schema.fromString(jsonSchema); Assert.assertEquals(actualSchema.getFieldSpecFor("noDefault").getDefaultNullValue(), expectedEmptyDefault); Assert.assertEquals(actualSchema.getFieldSpecFor("emptyDefault").getDefaultNullValue(), expectedEmptyDefault); Assert.assertEquals(actualSchema.getFieldSpecFor("nonEmptyDefault").getDefaultNullValue(), expectedNonEmptyDefault); Assert.assertEquals(actualSchema, expectedSchema); Assert.assertEquals(actualSchema.hashCode(), expectedSchema.hashCode()); } }
protected Schema createDummySchema(String tableName) { Schema schema = new Schema(); schema.setSchemaName(tableName); schema.addField(new DimensionFieldSpec("dimA", FieldSpec.DataType.STRING, true, "")); schema.addField(new DimensionFieldSpec("dimB", FieldSpec.DataType.STRING, true, 0)); schema.addField(new MetricFieldSpec("metricA", FieldSpec.DataType.INT, 0)); schema.addField(new MetricFieldSpec("metricB", FieldSpec.DataType.DOUBLE, -1)); return schema; }
/** * This test generates an avro with TDigest BYTES data, and tests segment generation. */ @Test public void testTDigestAvro() throws Exception { Schema schema = new Schema(); schema.addField(new MetricFieldSpec(FIXED_BYTES_UNSORTED_COLUMN, FieldSpec.DataType.BYTES)); schema.addField(new MetricFieldSpec(VARIABLE_BYTES_COLUMN, FieldSpec.DataType.BYTES)); List<byte[]> _fixedExpected = new ArrayList<>(NUM_ROWS); List<byte[]> _varExpected = new ArrayList<>(NUM_ROWS); buildAvro(schema, _fixedExpected, _varExpected); IndexSegment segment = buildSegmentFromAvro(schema, AVRO_DIR_NAME, AVRO_NAME, SEGMENT_NAME); SegmentMetadata metadata = segment.getSegmentMetadata(); Assert.assertTrue(metadata.hasDictionary(FIXED_BYTES_UNSORTED_COLUMN)); Assert.assertFalse(metadata.hasDictionary(VARIABLE_BYTES_COLUMN)); PinotSegmentRecordReader reader = new PinotSegmentRecordReader(new File(AVRO_DIR_NAME, SEGMENT_NAME)); GenericRow row = new GenericRow(); int i = 0; while (reader.hasNext()) { row = reader.next(row); Assert.assertEquals(ByteArray.compare((byte[]) row.getValue(FIXED_BYTES_UNSORTED_COLUMN), _fixedExpected.get(i)), 0); Assert.assertEquals(ByteArray.compare((byte[]) row.getValue(VARIABLE_BYTES_COLUMN), _varExpected.get(i++)), 0); } segment.destroy(); }
private Schema createPinotSchemaWithTimeSpec(TimeFieldSpec timeSpec) { Schema testSchema = new Schema(); testSchema.setSchemaName("schema"); FieldSpec spec; spec = new DimensionFieldSpec(D1, DataType.STRING, true); testSchema.addField(spec); spec = new DimensionFieldSpec(D2, DataType.STRING, true); testSchema.addField(spec); spec = new MetricFieldSpec(M1, DataType.INT); testSchema.addField(spec); spec = new MetricFieldSpec(M2, DataType.FLOAT); testSchema.addField(spec); testSchema.addField(timeSpec); return testSchema; }
schema.addField(new MetricFieldSpec(DOUBLE_COLUMN, FieldSpec.DataType.DOUBLE)); schema.addField(new MetricFieldSpec(TDIGEST_COLUMN, FieldSpec.DataType.BYTES)); schema.addField(new DimensionFieldSpec(GROUP_BY_COLUMN, FieldSpec.DataType.STRING, true));
continue; case METRIC: final FieldSpec metricFieldSpec = new MetricFieldSpec(columnName, getColumnType(field)); schema.addField(metricFieldSpec); continue;
public static Schema extractSchemaFromAvroWithoutTime(File avroFile) throws IOException { DataFileStream<GenericRecord> dataStream = new DataFileStream<GenericRecord>(new FileInputStream(avroFile), new GenericDatumReader<GenericRecord>()); Schema schema = new Schema(); for (final Field field : dataStream.getSchema().getFields()) { try { getColumnType(field); } catch (Exception e) { LOGGER.warn("Caught exception while converting Avro field {} of type {}, field will not be in schema.", field.name(), field.schema().getType()); continue; } final String columnName = field.name(); final String pinotType = field.getProp("pinotType"); final FieldSpec fieldSpec; if (pinotType != null && "METRIC".equals(pinotType)) { fieldSpec = new MetricFieldSpec(); } else { fieldSpec = new DimensionFieldSpec(); } fieldSpec.setName(columnName); fieldSpec.setDataType(getColumnType(dataStream.getSchema().getField(columnName))); fieldSpec.setSingleValueField(isSingleValueField(dataStream.getSchema().getField(columnName))); schema.addField(fieldSpec); } dataStream.close(); return schema; }
private Schema createPinotSchema() { Schema testSchema = new Schema(); testSchema.setSchemaName("schema"); testSchema.addField(new DimensionFieldSpec(D_SV_1, DataType.STRING, true)); testSchema.addField(new DimensionFieldSpec(D_MV_1, FieldSpec.DataType.STRING, false)); testSchema.addField(new MetricFieldSpec(M1, FieldSpec.DataType.INT)); testSchema.addField(new MetricFieldSpec(M2, FieldSpec.DataType.FLOAT)); testSchema.addField(new TimeFieldSpec(new TimeGranularitySpec(FieldSpec.DataType.LONG, TimeUnit.HOURS, TIME))); return testSchema; }
schema.addField(new DimensionFieldSpec(D1, FieldSpec.DataType.INT, true)); schema.addField(new DimensionFieldSpec(D2, FieldSpec.DataType.STRING, true)); schema.addField(new MetricFieldSpec(M1, FieldSpec.DataType.LONG)); schema.addField(new MetricFieldSpec(M2, FieldSpec.DataType.DOUBLE)); schema.addField(new TimeFieldSpec(T, FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS));
private Schema createPinotSchema() { Schema testSchema = new Schema(); testSchema.setSchemaName("schema"); testSchema.addField(new DimensionFieldSpec(D_SV_1, FieldSpec.DataType.STRING, true)); testSchema.addField(new DimensionFieldSpec(D_SV_2, FieldSpec.DataType.INT, true)); testSchema.addField(new DimensionFieldSpec(D_MV_1, FieldSpec.DataType.STRING, false)); testSchema.addField(new MetricFieldSpec(M1, FieldSpec.DataType.INT)); testSchema.addField(new MetricFieldSpec(M2, FieldSpec.DataType.FLOAT)); testSchema.addField(new TimeFieldSpec(new TimeGranularitySpec(FieldSpec.DataType.LONG, TimeUnit.HOURS, TIME))); return testSchema; }
schema.addField(new DimensionFieldSpec(D1, FieldSpec.DataType.INT, true)); schema.addField(new DimensionFieldSpec(D2, FieldSpec.DataType.STRING, true)); schema.addField(new MetricFieldSpec(M1, FieldSpec.DataType.INT)); schema.addField(new TimeFieldSpec(T, FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS));