@Override public void getBytesValues(int[] inDocIds, int inStartPos, int inDocIdsSize, byte[][] outValues, int outStartPos) { int inEndPos = inStartPos + inDocIdsSize; ReaderContext context = _reader.createContext(); if (_dataType.equals(DataType.BYTES)) { for (int i = inStartPos; i < inEndPos; i++) { outValues[outStartPos++] = _reader.getBytes(inDocIds[i], context); } } else { throw new UnsupportedOperationException(); } }
@Override public int getLengthOfLargestElement() { // Length of longest string int maximumStringLength = 0; // If this column is a string/bytes column, iterate over the dictionary to find the maximum length FieldSpec.DataType dataType = _dataSource.getDataSourceMetadata().getDataType(); final int length = _dictionaryReader.length(); if (dataType.equals(FieldSpec.DataType.STRING)) { for (int i = 0; i < length; i++) { maximumStringLength = Math.max(_dictionaryReader.getStringValue(i).length(), maximumStringLength); } } else if (dataType.equals(FieldSpec.DataType.BYTES)) { for (int i = 0; i < length; i++) { maximumStringLength = Math.max(_dictionaryReader.getBytesValue(i).length, maximumStringLength); } } return maximumStringLength; }
@Override public int getLengthOfShortestElement() { // Length of longest string int minStringLength = Integer.MAX_VALUE; // If this column is a string/bytes column, iterate over the dictionary to find the maximum length FieldSpec.DataType dataType = _dataSource.getDataSourceMetadata().getDataType(); final int length = _dictionaryReader.length(); if (dataType.equals(FieldSpec.DataType.STRING)) { for (int i = 0; i < length; i++) { minStringLength = Math.min(_dictionaryReader.getStringValue(i).length(), minStringLength); } } else if (dataType.equals(FieldSpec.DataType.BYTES)) { for (int i = 0; i < length; i++) { minStringLength = Math.min(_dictionaryReader.getBytesValue(i).length, minStringLength); } } return minStringLength; }
Object sortedValues = dictionary.getSortedValues(); List<Comparable> actualSortedValues = (dataType.equals(FieldSpec.DataType.STRING) || dataType.equals(FieldSpec.DataType.BYTES)) ? Arrays .asList((Comparable[]) dictionary.getSortedValues()) : primitiveArrayToList(dataType, sortedValues); Assert.assertEquals(actualSortedValues, expectedSortedValues); if (!dataType.equals(FieldSpec.DataType.BYTES)) { for (int i = 0; i < dictionary.length(); i++) { Assert.assertTrue(dictionary.inRange(expectedMin.toString(), expectedMax.toString(), i, true, true));
/** * Returns true if dictionary should be created for a column, false otherwise. * Currently there are two sources for this config: * <ul> * <li> ColumnIndexCreationInfo (this is currently hard-coded to always return dictionary). </li> * <li> SegmentGeneratorConfig</li> * </ul> * * This method gives preference to the SegmentGeneratorConfig first. * * @param info Column index creation info * @param config Segment generation config * @param spec Field spec for the column * @return True if dictionary should be created for the column, false otherwise */ private boolean createDictionaryForColumn(ColumnIndexCreationInfo info, SegmentGeneratorConfig config, FieldSpec spec) { String column = spec.getName(); if (config.getRawIndexCreationColumns().contains(column) || config.getRawIndexCompressionType() .containsKey(column)) { if (!spec.isSingleValueField()) { throw new RuntimeException( "Creation of indices without dictionaries is supported for single valued columns only."); } return false; } else if (spec.getDataType().equals(FieldSpec.DataType.BYTES) && !info.isFixedLength()) { return false; } return info.isCreateDictionary(); }
private boolean shouldConvertColumn(FieldSpec fieldSpec) { String columnName = fieldSpec.getName(); FieldSpec.DataType dataType = fieldSpec.getDataType(); int numTotalDocs = _originalSegmentMetadata.getTotalDocs(); ColumnMetadata columnMetadata = _originalSegmentMetadata.getColumnMetadataFor(columnName); int cardinality = columnMetadata.getCardinality(); // In bits int lengthOfEachEntry; if (dataType.equals(FieldSpec.DataType.STRING)) { lengthOfEachEntry = columnMetadata.getColumnMaxLength() * Byte.SIZE; } else { lengthOfEachEntry = dataType.size() * Byte.SIZE; } long dictionaryBasedIndexSize = (long) numTotalDocs * columnMetadata.getBitsPerElement() + (long) cardinality * lengthOfEachEntry; long rawIndexSize = (long) numTotalDocs * lengthOfEachEntry; LOGGER.info( "For column: {}, size of dictionary based index: {} bits, size of raw index (without compression): {} bits", columnName, dictionaryBasedIndexSize, rawIndexSize); return rawIndexSize <= dictionaryBasedIndexSize * CONVERSION_THRESHOLD; }
@Override public Serializable getValue(int docId) { bvIter.skipTo(docId); if (_dataType.equals(FieldSpec.DataType.BYTES)) { // byte[] is converted to equivalent Hex String for selection queries. byte[] bytes = bvIter.nextBytesVal(); return ByteArray.toHexString(bytes); } else { return bvIter.nextStringVal(); } } }
public StringSelectionColumnIterator(Block block) { _dataType = block.getMetadata().getDataType(); Preconditions .checkArgument(_dataType.equals(FieldSpec.DataType.STRING) || _dataType.equals(FieldSpec.DataType.BYTES), "Illegal data type for StringSelectionColumnIterator: " + _dataType); bvIter = (BlockSingleValIterator) block.getBlockValueSet().iterator(); }
@Override public Serializable getValue(int docId) { _blockSingleValIterator.skipTo(docId); // For selection, we convert BYTES data type to equivalent HEX string. if (_dataType.equals(FieldSpec.DataType.BYTES)) { return ByteArray.toHexString(_dictionary.getBytesValue(_blockSingleValIterator.nextIntVal())); } return (Serializable) _dictionary.get(_blockSingleValIterator.nextIntVal()); } }