public MetricFieldSpec(@Nonnull String name, @Nonnull DataType dataType, @Nonnull Object defaultNullValue) { super(name, dataType, true, defaultNullValue); _fieldSize = _dataType.size(); }
public void setFieldSize(int fieldSize) { Preconditions.checkArgument(fieldSize > 0, "Field size: " + fieldSize + " is not a positive number."); if (_dataType != null && _dataType != DataType.STRING) { Preconditions.checkArgument(fieldSize == _dataType.size(), "Field size: " + fieldSize + " does not match data type: " + _dataType); } _fieldSize = fieldSize; }
public MetricFieldSpec(@Nonnull String name, @Nonnull DataType dataType) { super(name, dataType, true); _fieldSize = _dataType.size(); }
int indexColumnSize = FieldSpec.DataType.INT.size(); if (noDictionaryColumns.contains(column) && fieldSpec.isSingleValueField() && dataType != FieldSpec.DataType.STRING && !invertedIndexColumns.contains(column)) { indexColumnSize = dataType.size(); } else { int dictionaryColumnSize; dictionaryColumnSize = _statsHistory.getEstimatedAvgColSize(column); } else { dictionaryColumnSize = dataType.size();
/** * Test all {@link FieldSpec.DataType}. */ @Test public void testDataType() { Assert.assertEquals(INT.getStoredType(), INT); Assert.assertEquals(LONG.getStoredType(), LONG); Assert.assertEquals(FLOAT.getStoredType(), FLOAT); Assert.assertEquals(DOUBLE.getStoredType(), DOUBLE); Assert.assertEquals(BOOLEAN.getStoredType(), STRING); Assert.assertEquals(STRING.getStoredType(), STRING); Assert.assertEquals(BYTES.getStoredType(), BYTES); Assert.assertEquals(INT.size(), Integer.BYTES); Assert.assertEquals(LONG.size(), Long.BYTES); Assert.assertEquals(FLOAT.size(), Float.BYTES); Assert.assertEquals(DOUBLE.size(), Double.BYTES); Assert.assertEquals(FieldSpec.DataType.valueOf(Schema.Type.INT), INT); Assert.assertEquals(FieldSpec.DataType.valueOf(Schema.Type.LONG), LONG); Assert.assertEquals(FieldSpec.DataType.valueOf(Schema.Type.FLOAT), FLOAT); Assert.assertEquals(FieldSpec.DataType.valueOf(Schema.Type.DOUBLE), DOUBLE); Assert.assertEquals(FieldSpec.DataType.valueOf(Schema.Type.BOOLEAN), STRING); Assert.assertEquals(FieldSpec.DataType.valueOf(Schema.Type.STRING), STRING); Assert.assertEquals(FieldSpec.DataType.valueOf(Schema.Type.ENUM), STRING); Assert.assertEquals(FieldSpec.DataType.valueOf(Schema.Type.BYTES), BYTES); }
private boolean shouldConvertColumn(FieldSpec fieldSpec) { String columnName = fieldSpec.getName(); FieldSpec.DataType dataType = fieldSpec.getDataType(); int numTotalDocs = _originalSegmentMetadata.getTotalDocs(); ColumnMetadata columnMetadata = _originalSegmentMetadata.getColumnMetadataFor(columnName); int cardinality = columnMetadata.getCardinality(); // In bits int lengthOfEachEntry; if (dataType.equals(FieldSpec.DataType.STRING)) { lengthOfEachEntry = columnMetadata.getColumnMaxLength() * Byte.SIZE; } else { lengthOfEachEntry = dataType.size() * Byte.SIZE; } long dictionaryBasedIndexSize = (long) numTotalDocs * columnMetadata.getBitsPerElement() + (long) cardinality * lengthOfEachEntry; long rawIndexSize = (long) numTotalDocs * lengthOfEachEntry; LOGGER.info( "For column: {}, size of dictionary based index: {} bits, size of raw index (without compression): {} bits", columnName, dictionaryBasedIndexSize, rawIndexSize); return rawIndexSize <= dictionaryBasedIndexSize * CONVERSION_THRESHOLD; }
private FixedByteSingleColumnMultiValueReaderWriter createReaderWriter(FieldSpec.DataType dataType, Random r, int rows, int maxNumberOfMultiValuesPerRow) { final int avgMultiValueCount = r.nextInt(maxNumberOfMultiValuesPerRow) + 1; final int rowCountPerChunk = r.nextInt(rows) + 1; return new FixedByteSingleColumnMultiValueReaderWriter(maxNumberOfMultiValuesPerRow, avgMultiValueCount, rowCountPerChunk, dataType.size(), _memoryManager, "ReaderWriter"); }
@Override public void postInject() { super.postInject(); _fieldSize = _dataType.size(); } }
@Override public void setDataType(@Nonnull DataType dataType) { super.setDataType(dataType); if (_dataType != DataType.STRING && _dataType != DataType.BYTES) { _fieldSize = _dataType.size(); } }