/** * For REALTIME segment. */ public ColumnDataSource(FieldSpec fieldSpec, int numDocs, int maxNumMultiValues, DataFileReader forwardIndex, InvertedIndexReader invertedIndex, MutableDictionary dictionary, BloomFilterReader bloomFilter) { this(fieldSpec.getName(), fieldSpec.getDataType(), fieldSpec.isSingleValueField(), false, numDocs, maxNumMultiValues, forwardIndex, invertedIndex, dictionary, bloomFilter, Constants.UNKNOWN_CARDINALITY); }
public void setIncomingGranularitySpec(@Nonnull TimeGranularitySpec incomingGranularitySpec) { _incomingGranularitySpec = incomingGranularitySpec; if (_outgoingGranularitySpec == null) { super.setName(incomingGranularitySpec.getName()); super.setDataType(incomingGranularitySpec.getDataType()); } }
public static Object extractValue(@Nullable JsonNode jsonValue, FieldSpec fieldSpec) { if (fieldSpec.isSingleValueField()) { if (jsonValue != null && !jsonValue.isNull()) { return extractSingleValue(jsonValue, fieldSpec.getDataType()); } else { return fieldSpec.getDefaultNullValue(); } } else { if (jsonValue != null && !jsonValue.isNull()) { if (jsonValue.isArray()) { int numValues = jsonValue.size(); if (numValues != 0) { Object[] values = new Object[numValues]; for (int i = 0; i < numValues; i++) { values[i] = extractSingleValue(jsonValue.get(i), fieldSpec.getDataType()); } return values; } else { return new Object[]{fieldSpec.getDefaultNullValue()}; } } else { return new Object[]{extractSingleValue(jsonValue, fieldSpec.getDataType())}; } } else { return new Object[]{fieldSpec.getDefaultNullValue()}; } } }
@Override public void postInject() { // Compute the actual default null value from its string representation _defaultNullValue = getDefaultNullValue(getFieldType(), _dataType, _stringDefaultNullValue); }
public void setDefaultNullValue(@Nullable Object defaultNullValue) { if (defaultNullValue != null) { _stringDefaultNullValue = getStringValue(defaultNullValue); } if (_dataType != null) { _defaultNullValue = getDefaultNullValue(getFieldType(), _dataType, _stringDefaultNullValue); } }
private void addInvertedIndex(int docId, Map<String, Object> dictIdMap) { // Update inverted index at last // NOTE: inverted index have to be updated at last because once it gets updated, the latest record will become // queryable for (FieldSpec fieldSpec : _schema.getAllFieldSpecs()) { String column = fieldSpec.getName(); RealtimeInvertedIndexReader invertedIndex = _invertedIndexMap.get(column); if (invertedIndex != null) { if (fieldSpec.isSingleValueField()) { invertedIndex.add(((Integer) dictIdMap.get(column)), docId); } else { int[] dictIds = (int[]) dictIdMap.get(column); for (int dictId : dictIds) { invertedIndex.add(dictId, docId); } } } } }
String column = spec.getName(); switch (spec.getDataType()) { case BOOLEAN: case STRING: columnStatsCollectorMap .put(spec.getName(), new StringColumnPreIndexStatsCollector(column, _statsCollectorConfig)); break; case INT: columnStatsCollectorMap .put(spec.getName(), new IntColumnPreIndexStatsCollector(column, _statsCollectorConfig)); break; case LONG: columnStatsCollectorMap .put(spec.getName(), new LongColumnPreIndexStatsCollector(column, _statsCollectorConfig)); break; case FLOAT: columnStatsCollectorMap .put(spec.getName(), new FloatColumnPreIndexStatsCollector(column, _statsCollectorConfig)); break; case DOUBLE: columnStatsCollectorMap .put(spec.getName(), new DoubleColumnPreIndexStatsCollector(column, _statsCollectorConfig)); break; case BYTES: columnStatsCollectorMap .put(spec.getName(), new BytesColumnPredIndexStatsCollector(column, _statsCollectorConfig)); break; default:
fieldSpec1.setName("svDimension"); fieldSpec1.setDataType(BOOLEAN); fieldSpec1.setDefaultNullValue(false); FieldSpec fieldSpec2 = new DimensionFieldSpec("svDimension", BOOLEAN, true, false); Assert.assertEquals(fieldSpec1, fieldSpec2); Assert.assertEquals(fieldSpec1.toString(), fieldSpec2.toString()); Assert.assertEquals(fieldSpec1.hashCode(), fieldSpec2.hashCode()); Assert.assertEquals(fieldSpec1.getDefaultNullValue(), "false"); fieldSpec1.setName("mvDimension"); fieldSpec1.setDataType(INT); fieldSpec1.setSingleValueField(false); fieldSpec2 = new DimensionFieldSpec("mvDimension", INT, false); Assert.assertEquals(fieldSpec1, fieldSpec2); Assert.assertEquals(fieldSpec1.toString(), fieldSpec2.toString()); Assert.assertEquals(fieldSpec1.hashCode(), fieldSpec2.hashCode()); Assert.assertEquals(fieldSpec1.getDefaultNullValue(), Integer.MIN_VALUE); fieldSpec1.setName("mvDimension"); fieldSpec1.setDataType(FLOAT); fieldSpec1.setSingleValueField(false); fieldSpec1.setDefaultNullValue(-0.1); fieldSpec2 = new DimensionFieldSpec("mvDimension", FLOAT, false, -0.1); Assert.assertEquals(fieldSpec1, fieldSpec2); Assert.assertEquals(fieldSpec1.toString(), fieldSpec2.toString()); Assert.assertEquals(fieldSpec1.hashCode(), fieldSpec2.hashCode()); Assert.assertEquals(fieldSpec1.getDefaultNullValue(), -0.1F); fieldSpec1.setName("metric");
FieldSpec fieldSpecInSchema = _schema.getFieldSpecFor(column); Preconditions.checkNotNull(fieldSpecInSchema); FieldSpec.FieldType fieldTypeInSchema = fieldSpecInSchema.getFieldType(); ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(column); FieldSpec.DataType dataTypeInSchema = fieldSpecInSchema.getDataType(); boolean isSingleValueInMetadata = columnMetadata.isSingleValue(); boolean isSingleValueInSchema = fieldSpecInSchema.isSingleValueField(); String defaultValueInMetadata = columnMetadata.getDefaultNullValueString(); String defaultValueInSchema = fieldSpecInSchema.getDefaultNullValue().toString(); if (dataTypeInMetadata != dataTypeInSchema || isSingleValueInMetadata != isSingleValueInSchema || !defaultValueInSchema.equals(defaultValueInMetadata)) {
final int totalRawDocs = _segmentMetadata.getTotalRawDocs(); final int totalAggDocs = totalDocs - totalRawDocs; final FieldSpec.DataType dataType = fieldSpec.getDataType(); final Object defaultValue = fieldSpec.getDefaultNullValue(); final boolean isSingleValue = fieldSpec.isSingleValueField(); final int maxNumberOfMultiValueElements = isSingleValue ? 0 : 1; int dictionaryElementSize = 0; new SingleValueSortedForwardIndexCreator(_indexDir, fieldSpec.getName(), 1/*cardinality*/); for (int docId = 0; docId < totalDocs; docId++) { svFwdIndexCreator.index(docId, 0/*dictionaryId*/); new MultiValueUnsortedForwardIndexCreator(_indexDir, fieldSpec.getName(), 1/*cardinality*/, totalDocs/*numDocs*/, totalDocs/*totalNumberOfValues*/); int[] dictIds = {0};
public void createInvertedIndexForAllColumns() { if (_schema == null) { LOGGER.warn("Schema has not been set, will not create inverted index for all columns."); return; } for (FieldSpec spec : _schema.getAllFieldSpecs()) { _invertedIndexCreationColumns.add(spec.getName()); } }
@Nonnull public GenericRow transform(@Nonnull GenericData.Record from, @Nonnull GenericRow to) { for (FieldSpec fieldSpec : _schema.getAllFieldSpecs()) { FieldSpec incomingFieldSpec = fieldSpec.getFieldType() == FieldSpec.FieldType.TIME ? _incomingTimeFieldSpec : fieldSpec; String fieldName = incomingFieldSpec.getName(); Object avroValue = from.get(fieldName); if (incomingFieldSpec.isSingleValueField()) { to.putField(fieldName, AvroUtils.transformAvroValueToObject(avroValue, incomingFieldSpec)); } else { to.putField(fieldName, AvroUtils.transformAvroArrayToObjectArray((Array) avroValue, incomingFieldSpec)); } } return to; } }
FieldType fieldType = fieldSpec.getFieldType(); DataType dataType = fieldSpec.getDataType(); String fieldName = fieldSpec.getName(); switch (fieldType) { case DIMENSION:
public PinotSegmentSorter(int numDocs, Schema schema, Map<String, PinotSegmentColumnReader> columnReaderMap) { _numDocs = numDocs; _schema = schema; _columnReaderMap = columnReaderMap; _dimensionNames = new ArrayList<>(); for (FieldSpec fieldSpec : _schema.getAllFieldSpecs()) { // Count all fields that are not metrics as dimensions if (fieldSpec.getFieldType() != FieldSpec.FieldType.METRIC) { String dimensionName = fieldSpec.getName(); _numDimensions++; _dimensionNames.add(dimensionName); } } }
private FieldSpec buildSpec(DataGeneratorSpec genSpec, String column) { DataType dataType = genSpec.getDataTypesMap().get(column); FieldType fieldType = genSpec.getFieldTypesMap().get(column); FieldSpec spec; switch (fieldType) { case DIMENSION: spec = new DimensionFieldSpec(); break; case METRIC: spec = new MetricFieldSpec(); break; case TIME: spec = new TimeFieldSpec(column, dataType, genSpec.getTimeUnitMap().get(column)); break; default: throw new RuntimeException("Invalid Field type."); } spec.setName(column); spec.setDataType(dataType); spec.setSingleValueField(true); return spec; }
if (fieldSpec.getFieldType() != FieldSpec.FieldType.METRIC) { String dimensionName = fieldSpec.getName(); _numDimensions++; _dimensionNames.add(dimensionName); _dimensionStarValues.add(fieldSpec.getDefaultNullValue()); _dimensionDictionaries.add(HashBiMap.create());
@Override public boolean isSorted() { return fieldSpec.isSingleValueField() && isSorted; }
@Override public void logStats() { try { for (final String column : columnStatsCollectorMap.keySet()) { AbstractColumnStatisticsCollector statisticsCollector = columnStatsCollectorMap.get(column); LOGGER.info("********** logging for column : " + column + " ********************* "); LOGGER.info("min value : " + statisticsCollector.getMinValue()); LOGGER.info("max value : " + statisticsCollector.getMaxValue()); LOGGER.info("cardinality : " + statisticsCollector.getCardinality()); LOGGER.info("length of largest column : " + statisticsCollector.getLengthOfLargestElement()); LOGGER.info("is sorted : " + statisticsCollector.isSorted()); LOGGER.info("column type : " + _statsCollectorConfig.getSchema().getFieldSpecFor(column).getDataType()); if (statisticsCollector.getPartitionFunction() != null) { LOGGER.info("partitions: " + statisticsCollector.getPartitions().toString()); } LOGGER.info("***********************************************"); } } catch (final Exception e) { LOGGER.error("Caught exception while logging column stats", e); } } }
Object otherVal = o.getRow().getValue(column); Object thisVal = _row.getValue(column); if (fieldSpec.isSingleValueField()) { switch (fieldSpec.getDataType()) { case INT: compare = ((Integer) thisVal).compareTo((Integer) otherVal);
/** * Complete the stats gathering process and store the stats information in indexCreationInfoMap. */ void buildIndexCreationInfo() throws Exception { for (FieldSpec spec : dataSchema.getAllFieldSpecs()) { String column = spec.getName(); // Skip adding virtual columns, so that they don't get an on-disk representation if (dataSchema.isVirtualColumn(column)) { continue; } ColumnStatistics columnProfile = segmentStats.getColumnProfileFor(column); indexCreationInfoMap.put(column, new ColumnIndexCreationInfo(columnProfile, true/*createDictionary*/, ForwardIndexType.FIXED_BIT_COMPRESSED, InvertedIndexType.ROARING_BITMAPS, false/*isAutoGenerated*/, dataSchema.getFieldSpecFor(column).getDefaultNullValue())); } segmentIndexCreationInfo.setTotalDocs(totalDocs); segmentIndexCreationInfo.setTotalRawDocs(totalRawDocs); segmentIndexCreationInfo.setTotalAggDocs(totalAggDocs); segmentIndexCreationInfo.setStarTreeEnabled(createStarTree); }