protected Schema createDummySchema(String tableName) { Schema schema = new Schema(); schema.setSchemaName(tableName); schema.addField(new DimensionFieldSpec("dimA", FieldSpec.DataType.STRING, true, "")); schema.addField(new DimensionFieldSpec("dimB", FieldSpec.DataType.STRING, true, 0)); schema.addField(new MetricFieldSpec("metricA", FieldSpec.DataType.INT, 0)); schema.addField(new MetricFieldSpec("metricB", FieldSpec.DataType.DOUBLE, -1)); return schema; }
public MeetupRsvpStream(File schemaFile) throws IOException, URISyntaxException { schema = Schema.fromFile(schemaFile); Properties properties = new Properties(); properties.put("metadata.broker.list", KafkaStarterUtils.DEFAULT_KAFKA_BROKER); properties.put("serializer.class", "kafka.serializer.DefaultEncoder"); properties.put("request.required.acks", "1"); ProducerConfig producerConfig = new ProducerConfig(properties); producer = new Producer<String, byte[]>(producerConfig); }
/** * Fetch {@link Schema} from a {@link ZNRecord}. */ public static Schema fromZNRecord(@Nonnull ZNRecord record) throws IOException { String schemaJSON = record.getSimpleField("schemaJSON"); return Schema.fromString(schemaJSON); }
/** * Compare two schemas ignoring their version number. * * @return <code>true</code> if two schemas equal to each other. * <p><code>false</code>if two schemas do not equal to each other. */ public static boolean equalsIgnoreVersion(@Nonnull Schema schema1, @Nonnull Schema schema2) { Preconditions.checkNotNull(schema1); Preconditions.checkNotNull(schema2); return schema1.getSchemaName().equals(schema2.getSchemaName()) && schema1.getFieldSpecMap() .equals(schema2.getFieldSpecMap()); }
/** * Returns a new schema based on the original one. The new schema removes columns as needed (for ex, virtual cols) * and adds the new timespec to the schema. */ @VisibleForTesting public Schema getUpdatedSchema(Schema original) { TimeFieldSpec tfs = original.getTimeFieldSpec(); // Use outgoing granularity for creating segment TimeGranularitySpec outgoing = tfs.getOutgoingGranularitySpec(); TimeFieldSpec newTimeSpec = new TimeFieldSpec(outgoing); Schema newSchema = new Schema(); newSchema.addField(newTimeSpec); for (String col : original.getPhysicalColumnNames()) { if (!col.equals(tfs.getName())) { newSchema.addField(original.getFieldSpecFor(col)); } } return newSchema; } }
@Test public void testByteType() throws DecoderException, IOException { Schema expectedSchema = new Schema(); byte[] expectedEmptyDefault = new byte[0]; byte[] expectedNonEmptyDefault = Hex.decodeHex("abcd1234".toCharArray()); expectedSchema.setSchemaName("test"); expectedSchema.addField(new MetricFieldSpec("noDefault", FieldSpec.DataType.BYTES)); expectedSchema.addField(new MetricFieldSpec("emptyDefault", FieldSpec.DataType.BYTES, expectedEmptyDefault)); expectedSchema.addField(new MetricFieldSpec("nonEmptyDefault", FieldSpec.DataType.BYTES, expectedNonEmptyDefault)); // Ensure that schema can be serialized and de-serialized (ie byte[] converted to String and back). String jsonSchema = expectedSchema.getJSONSchema(); Schema actualSchema = Schema.fromString(jsonSchema); Assert.assertEquals(actualSchema.getFieldSpecFor("noDefault").getDefaultNullValue(), expectedEmptyDefault); Assert.assertEquals(actualSchema.getFieldSpecFor("emptyDefault").getDefaultNullValue(), expectedEmptyDefault); Assert.assertEquals(actualSchema.getFieldSpecFor("nonEmptyDefault").getDefaultNullValue(), expectedNonEmptyDefault); Assert.assertEquals(actualSchema, expectedSchema); Assert.assertEquals(actualSchema.hashCode(), expectedSchema.hashCode()); } }
private void setupRealtimeTable(String table) throws Exception { _offlineTableConfig = null; File schemaFile = getSchemaFile(); Schema schema = Schema.fromFile(schemaFile); String schemaName = schema.getSchemaName(); addSchema(schemaFile, schemaName); String timeColumnName = schema.getTimeColumnName(); Assert.assertNotNull(timeColumnName); TimeUnit outgoingTimeUnit = schema.getOutgoingTimeUnit(); Assert.assertNotNull(outgoingTimeUnit); String timeType = outgoingTimeUnit.toString(); addRealtimeTable(table, useLlc(), KafkaStarterUtils.DEFAULT_KAFKA_BROKER, KafkaStarterUtils.DEFAULT_ZK_STR, getKafkaTopic(), getRealtimeSegmentFlushSize(), null, timeColumnName, timeType, schemaName, null, null, getLoadMode(), getSortedColumn(), getInvertedIndexColumns(), getBloomFilterIndexColumns(), getRawIndexColumns(), getTaskConfig(), getStreamConsumerFactoryClassName()); completeTableConfiguration(); }
Schema schema = createDummySchema(schemaName); String url = _controllerRequestURLBuilder.forSchemaCreate(); PostMethod postMethod = sendMultipartPostRequest(url, schema.toString()); Assert.assertEquals(postMethod.getStatusCode(), 200); schema.addField(new DimensionFieldSpec("NewColumn", FieldSpec.DataType.STRING, true)); postMethod = sendMultipartPostRequest(url, schema.toString()); Assert.assertEquals(postMethod.getStatusCode(), 200); Schema readSchema = Schema.fromString(schemaStr); Schema inputSchema = Schema.fromString(schema.toString()); Assert.assertEquals(readSchema, inputSchema); Assert.assertTrue(readSchema.getFieldSpecMap().containsKey("NewColumn")); Assert.assertFalse(readSchema.getFieldSpecMap().containsKey(yetAnotherColumn)); schema.addField(new DimensionFieldSpec(yetAnotherColumn, FieldSpec.DataType.STRING, true)); PutMethod putMethod = sendMultipartPutRequest(_controllerRequestURLBuilder.forSchemaUpdate(schemaName), schema.toString()); Assert.assertEquals(putMethod.getStatusCode(), 200); readSchema = Schema.fromString(schemaStr); inputSchema = Schema.fromString(schema.toString()); Assert.assertEquals(readSchema, inputSchema); Assert.assertTrue(readSchema.getFieldSpecMap().containsKey(yetAnotherColumn)); schema.toString().substring(1)); schema.setSchemaName("differentSchemaName"); putMethod = sendMultipartPutRequest(_controllerRequestURLBuilder.forSchemaUpdate(schemaName), schema.toString());
@Test public void testNoVirtualColumnsInSchema() { Schema schema = new Schema(); FieldSpec spec = new DimensionFieldSpec("col1", FieldSpec.DataType.STRING, true); schema.addField(spec); TimeFieldSpec tfs = new TimeFieldSpec("col1", FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS, "col2", FieldSpec.DataType.LONG, TimeUnit.DAYS); schema.addField(tfs); VirtualColumnProviderFactory.addBuiltInVirtualColumnsToSchema(schema); Assert.assertEquals(schema.getColumnNames().size(), 5); Assert.assertEquals(schema.getTimeFieldSpec().getIncomingGranularitySpec().getTimeType(), TimeUnit.MILLISECONDS); RealtimeSegmentConverter converter = new RealtimeSegmentConverter(null, "", schema, "testTable", "col1", "segment1", "col1"); Schema newSchema = converter.getUpdatedSchema(schema); Assert.assertEquals(newSchema.getColumnNames().size(), 2); Assert.assertEquals(newSchema.getTimeFieldSpec().getIncomingGranularitySpec().getTimeType(), TimeUnit.DAYS); } }
@Test public void testSerializeDeserialize() throws Exception { URL resourceUrl = getClass().getClassLoader().getResource("schemaTest.schema"); Assert.assertNotNull(resourceUrl); Schema schema = Schema.fromFile(new File(resourceUrl.getFile())); Schema schemaToCompare = Schema.fromString(schema.getJSONSchema()); Assert.assertEquals(schemaToCompare, schema); Assert.assertEquals(schemaToCompare.hashCode(), schema.hashCode()); schemaToCompare = SchemaUtils.fromZNRecord(SchemaUtils.toZNRecord(schema)); Assert.assertEquals(schemaToCompare, schema); Assert.assertEquals(schemaToCompare.hashCode(), schema.hashCode()); // When setting new fields, schema string should be updated String JSONSchema = schemaToCompare.getJSONSchema(); schemaToCompare.setSchemaName("newSchema"); String JSONSchemaToCompare = schemaToCompare.getJSONSchema(); Assert.assertFalse(JSONSchema.equals(JSONSchemaToCompare)); }
/** * Required by JSON deserializer. DO NOT USE. DO NOT REMOVE. * Adding @Deprecated to prevent usage * @param dateTimeFieldSpecs */ @Deprecated public void setDateTimeFieldSpecs(@Nonnull List<DateTimeFieldSpec> dateTimeFieldSpecs) { Preconditions.checkState(_dateTimeFieldSpecs.isEmpty()); for (DateTimeFieldSpec dateTimeFieldSpec : dateTimeFieldSpecs) { addField(dateTimeFieldSpec); } }
public void createInvertedIndexForAllColumns() { if (_schema == null) { LOGGER.warn("Schema has not been set, will not create inverted index for all columns."); return; } for (FieldSpec spec : _schema.getAllFieldSpecs()) { _invertedIndexCreationColumns.add(spec.getName()); } }
/** * @deprecated Load outside the class and use the setter for schema setting. * @throws IOException */ @Deprecated public void loadConfigFiles() throws IOException { Schema schema; if (_schemaFile != null) { schema = Schema.fromFile(new File(_schemaFile)); setSchema(schema); } else if (_format == FileFormat.AVRO) { schema = AvroUtils.getPinotSchemaFromAvroDataFile(new File(_inputFilePath)); setSchema(schema); } else { throw new RuntimeException("Input format " + _format + " requires schema."); } setTimeColumnName(schema.getTimeColumnName()); TimeFieldSpec timeFieldSpec = schema.getTimeFieldSpec(); if (timeFieldSpec != null) { setSegmentTimeUnit(timeFieldSpec.getIncomingGranularitySpec().getTimeType()); } else { setSegmentTimeUnit(TimeUnit.DAYS); } if (_readerConfigFile != null) { setReaderConfig(JsonUtils.fileToObject(new File(_readerConfigFile), CSVRecordReaderConfig.class)); } }
private static void printSchema(Schema schema) { LOGGER.info("schemaName: {}", schema.getSchemaName()); LOGGER.info("Dimension columnNames: "); int i = 0; for (DimensionFieldSpec spec : schema.getDimensionFieldSpecs()) { String columnInfo = i + " " + spec.getName(); if (!spec.isSingleValueField()) { LOGGER.info(columnInfo + " Multi-Value."); } else { LOGGER.info(columnInfo); } i += 1; } LOGGER.info("Metric columnNames: "); i = 0; for (MetricFieldSpec spec : schema.getMetricFieldSpecs()) { String columnInfo = i + " " + spec.getName(); if (!spec.isSingleValueField()) { LOGGER.info(columnInfo + " Multi-Value."); } else { LOGGER.info(columnInfo); } i += 1; } LOGGER.info("Time column: {}", schema.getTimeColumnName()); }
private void setupRealtimeTable() throws IOException { // Set up the realtime table. Map<String, String> streamConfigs = new HashMap<>(); streamConfigs.put("streamType", "kafka"); streamConfigs.put("stream.kafka.consumer.type", "highLevel"); streamConfigs.put("stream.kafka.topic.name", "kafkaTopic"); streamConfigs .put("stream.kafka.decoder.class.name", "org.apache.pinot.core.realtime.impl.kafka.KafkaAvroMessageDecoder"); streamConfigs.put("stream.kafka.hlc.zk.connect.string", "localhost:1111/zkConnect"); streamConfigs.put("stream.kafka.decoder.prop.schema.registry.rest.url", "http://localhost:2222/schemaRegistry"); TableConfig realtimeTimeConfig = new TableConfig.Builder(CommonConstants.Helix.TableType.REALTIME).setTableName(RAW_DINING_TABLE_NAME) .setTimeColumnName("timeColumn").setTimeType("DAYS"). setStreamConfigs(streamConfigs).build(); Schema schema = new Schema(); schema.setSchemaName(RAW_DINING_TABLE_NAME); _pinotResourceManager.addOrUpdateSchema(schema); // Fake an PinotLLCRealtimeSegmentManager instance: required for a realtime table creation. PinotLLCRealtimeSegmentManager .create(_pinotResourceManager, new ControllerConf(), new ControllerMetrics(new MetricsRegistry())); _pinotResourceManager.addTable(realtimeTimeConfig); _helixBrokerStarter.getHelixExternalViewBasedRouting() .markDataResourceOnline(realtimeTimeConfig, null, new ArrayList<InstanceConfig>()); }
@Nullable public FieldSpec getFieldSpecForColumn(String column) { return _schema.getFieldSpecFor(column); }
/** * Complete the stats gathering process and store the stats information in indexCreationInfoMap. */ void buildIndexCreationInfo() throws Exception { for (FieldSpec spec : dataSchema.getAllFieldSpecs()) { String column = spec.getName(); // Skip adding virtual columns, so that they don't get an on-disk representation if (dataSchema.isVirtualColumn(column)) { continue; } ColumnStatistics columnProfile = segmentStats.getColumnProfileFor(column); indexCreationInfoMap.put(column, new ColumnIndexCreationInfo(columnProfile, true/*createDictionary*/, ForwardIndexType.FIXED_BIT_COMPRESSED, InvertedIndexType.ROARING_BITMAPS, false/*isAutoGenerated*/, dataSchema.getFieldSpecFor(column).getDefaultNullValue())); } segmentIndexCreationInfo.setTotalDocs(totalDocs); segmentIndexCreationInfo.setTotalRawDocs(totalRawDocs); segmentIndexCreationInfo.setTotalAggDocs(totalAggDocs); segmentIndexCreationInfo.setStarTreeEnabled(createStarTree); }
@Override public boolean execute() throws Exception { if (_controllerHost == null) { _controllerHost = NetUtil.getHostAddress(); } if (!_exec) { LOGGER.warn("Dry Running Command: " + toString()); LOGGER.warn("Use the -exec option to actually execute the command."); return true; } File schemaFile = new File(_schemaFile); LOGGER.info("Executing command: " + toString()); if (!schemaFile.exists()) { throw new FileNotFoundException("file does not exist, + " + _schemaFile); } Schema schema = Schema.fromFile(schemaFile); try (FileUploadDownloadClient fileUploadDownloadClient = new FileUploadDownloadClient()) { fileUploadDownloadClient.addSchema( FileUploadDownloadClient.getUploadSchemaHttpURI(_controllerHost, Integer.parseInt(_controllerPort)), schema.getSchemaName(), schemaFile); } return true; } }
/** * Wrap {@link Schema} into a {@link ZNRecord}. */ public static ZNRecord toZNRecord(@Nonnull Schema schema) { ZNRecord record = new ZNRecord(schema.getSchemaName()); record.setSimpleField("schemaJSON", schema.getJSONSchema()); return record; }