private static IndexOptions getIndexOptions(IndexInput input, byte b) throws IOException { switch (b) { case 0: return IndexOptions.NONE; case 1: return IndexOptions.DOCS; case 2: return IndexOptions.DOCS_AND_FREQS; case 3: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; case 4: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; default: // BUG throw new CorruptIndexException("invalid IndexOptions byte: " + b, input); } }
private static IndexOptions getIndexOptions(IndexInput input, byte b) throws IOException { switch (b) { case 0: return IndexOptions.NONE; case 1: return IndexOptions.DOCS; case 2: return IndexOptions.DOCS_AND_FREQS; case 3: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; case 4: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; default: // BUG throw new CorruptIndexException("invalid IndexOptions byte: " + b, input); } }
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException { switch(b) { case 0: return DocValuesType.NONE; case 1: return DocValuesType.NUMERIC; case 2: return DocValuesType.BINARY; case 3: return DocValuesType.SORTED; case 4: return DocValuesType.SORTED_SET; case 5: return DocValuesType.SORTED_NUMERIC; default: throw new CorruptIndexException("invalid docvalues byte: " + b, input); } }
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException { switch(b) { case 0: return DocValuesType.NONE; case 1: return DocValuesType.NUMERIC; case 2: return DocValuesType.BINARY; case 3: return DocValuesType.SORTED; case 4: return DocValuesType.SORTED_SET; case 5: return DocValuesType.SORTED_NUMERIC; default: throw new CorruptIndexException("invalid docvalues byte: " + b, input); } }
@Override public int nextPosition() throws IOException { int pos = current.postings.nextPosition(); if (pos < 0) { throw new CorruptIndexException("position=" + pos + " is negative, field=\"" + field + " doc=" + current.mappedDocID, current.postings.toString()); } else if (pos > IndexWriter.MAX_POSITION) { throw new CorruptIndexException("position=" + pos + " is too large (> IndexWriter.MAX_POSITION=" + IndexWriter.MAX_POSITION + "), field=\"" + field + "\" doc=" + current.mappedDocID, current.postings.toString()); } return pos; }
private int readCompressedDim(IndexInput in) throws IOException { int compressedDim = in.readByte(); if (compressedDim < -1 || compressedDim >= numDataDims) { throw new CorruptIndexException("Got compressedDim="+compressedDim, in); } return compressedDim; }
/** * Reads CRC32 value as a 64-bit long from the input. * @throws CorruptIndexException if CRC is formatted incorrectly (wrong bits set) * @throws IOException if an i/o error occurs */ static long readCRC(IndexInput input) throws IOException { long value = input.readLong(); if ((value & 0xFFFFFFFF00000000L) != 0) { throw new CorruptIndexException("Illegal CRC-32 checksum: " + value, input); } return value; }
/** Expert: just reads and verifies the suffix of an index header */ public static String checkIndexHeaderSuffix(DataInput in, String expectedSuffix) throws IOException { int suffixLength = in.readByte() & 0xFF; byte suffixBytes[] = new byte[suffixLength]; in.readBytes(suffixBytes, 0, suffixBytes.length); String suffix = new String(suffixBytes, 0, suffixBytes.length, StandardCharsets.UTF_8); if (!suffix.equals(expectedSuffix)) { throw new CorruptIndexException("file mismatch, expected suffix=" + expectedSuffix + ", got=" + suffix, in); } return suffix; }
private static void validateFooter(IndexInput in) throws IOException { long remaining = in.length() - in.getFilePointer(); long expected = footerLength(); if (remaining < expected) { throw new CorruptIndexException("misplaced codec footer (file truncated?): remaining=" + remaining + ", expected=" + expected + ", fp=" + in.getFilePointer(), in); } else if (remaining > expected) { throw new CorruptIndexException("misplaced codec footer (file extended?): remaining=" + remaining + ", expected=" + expected + ", fp=" + in.getFilePointer(), in); } final int magic = in.readInt(); if (magic != FOOTER_MAGIC) { throw new CorruptIndexException("codec footer mismatch (file truncated?): actual footer=" + magic + " vs expected footer=" + FOOTER_MAGIC, in); } final int algorithmID = in.readInt(); if (algorithmID != 0) { throw new CorruptIndexException("codec footer mismatch: unknown algorithmID: " + algorithmID, in); } }
@Override public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException { assert offset + length <= originalLength; // add 7 padding bytes, this is not necessary but can help decompression run faster if (bytes.bytes.length < originalLength + 7) { bytes.bytes = new byte[ArrayUtil.oversize(originalLength + 7, 1)]; } final int decompressedLength = LZ4.decompress(in, offset + length, bytes.bytes, 0); if (decompressedLength > originalLength) { throw new CorruptIndexException("Corrupted: lengths mismatch: " + decompressedLength + " > " + originalLength, in); } bytes.offset = offset; bytes.length = length; }
/** Expert: just reads and verifies the object ID of an index header */ public static byte[] checkIndexHeaderID(DataInput in, byte[] expectedID) throws IOException { byte id[] = new byte[StringHelper.ID_LENGTH]; in.readBytes(id, 0, id.length); if (!Arrays.equals(id, expectedID)) { throw new CorruptIndexException("file mismatch, expected id=" + StringHelper.idToString(expectedID) + ", got=" + StringHelper.idToString(id), in); } return id; }
/** * Validates the codec footer previously written by {@link #writeFooter}. * @return actual checksum value * @throws IOException if the footer is invalid, if the checksum does not match, * or if {@code in} is not properly positioned before the footer * at the end of the stream. */ public static long checkFooter(ChecksumIndexInput in) throws IOException { validateFooter(in); long actualChecksum = in.getChecksum(); long expectedChecksum = readCRC(in); if (expectedChecksum != actualChecksum) { throw new CorruptIndexException("checksum failed (hardware problem?) : expected=" + Long.toHexString(expectedChecksum) + " actual=" + Long.toHexString(actualChecksum), in); } return actualChecksum; }
private static BytesRef readBytesRef(IndexInput in) throws IOException { int numBytes = in.readVInt(); if (numBytes < 0) { throw new CorruptIndexException("invalid bytes length: " + numBytes, in); } BytesRef bytes = new BytesRef(); bytes.length = numBytes; bytes.bytes = new byte[numBytes]; in.readBytes(bytes.bytes, 0, numBytes); return bytes; }
/** Confirms that the incoming index sort (if any) matches the existing index sort (if any). * This is unfortunately just best effort, because it could be the old index only has unsorted flushed segments built * before {@link Version#LUCENE_6_5_0} (flushed segments are sorted in Lucene 7.0). */ private void validateIndexSort() throws CorruptIndexException { Sort indexSort = config.getIndexSort(); if (indexSort != null) { for(SegmentCommitInfo info : segmentInfos) { Sort segmentIndexSort = info.info.getIndexSort(); if (segmentIndexSort != null && indexSort.equals(segmentIndexSort) == false) { throw new IllegalArgumentException("cannot change previous indexSort=" + segmentIndexSort + " (from segment=" + info + ") to new indexSort=" + indexSort); } else if (segmentIndexSort == null && info.info.getVersion().onOrAfter(Version.LUCENE_6_5_0)) { // Flushed segments are not sorted if they were built with a version prior to 6.5.0 throw new CorruptIndexException("segment not sorted with indexSort=" + segmentIndexSort, info.info.toString()); } } } }
/** Like {@link * #checkHeader(DataInput,String,int,int)} except this * version assumes the first int has already been read * and validated from the input. */ public static int checkHeaderNoMagic(DataInput in, String codec, int minVersion, int maxVersion) throws IOException { final String actualCodec = in.readString(); if (!actualCodec.equals(codec)) { throw new CorruptIndexException("codec mismatch: actual codec=" + actualCodec + " vs expected codec=" + codec, in); } final int actualVersion = in.readInt(); if (actualVersion < minVersion) { throw new IndexFormatTooOldException(in, actualVersion, minVersion, maxVersion); } if (actualVersion > maxVersion) { throw new IndexFormatTooNewException(in, actualVersion, minVersion, maxVersion); } return actualVersion; }
@Test void shouldRequestIndexPopulationIfTheIndexIsCorrupt() { // Given long faultyIndexId = 1; CorruptIndexException error = new CorruptIndexException( "It's broken.", "" ); LuceneIndexProvider provider = newFaultyIndexProvider( faultyIndexId, error ); // When StoreIndexDescriptor descriptor = forSchema( forLabel( 1, 1 ), provider.getProviderDescriptor() ).withId( faultyIndexId ); InternalIndexState initialState = provider.getInitialState( descriptor ); // Then assertThat( initialState, equalTo(InternalIndexState.POPULATING) ); logProvider.assertAtLeastOnce( loggedException( error ) ); }
/** * Returns (but does not validate) the checksum previously written by {@link #checkFooter}. * @return actual checksum value * @throws IOException if the footer is invalid */ public static long retrieveChecksum(IndexInput in) throws IOException { if (in.length() < footerLength()) { throw new CorruptIndexException("misplaced codec footer (file truncated?): length=" + in.length() + " but footerLength==" + footerLength(), in); } in.seek(in.length() - footerLength()); validateFooter(in); return readCRC(in); }
/** Retrieves the full index header from the provided {@link IndexInput}. * This throws {@link CorruptIndexException} if this file does * not appear to be an index file. */ public static byte[] readIndexHeader(IndexInput in) throws IOException { in.seek(0); final int actualHeader = in.readInt(); if (actualHeader != CODEC_MAGIC) { throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC, in); } String codec = in.readString(); in.readInt(); in.seek(in.getFilePointer() + StringHelper.ID_LENGTH); int suffixLength = in.readByte() & 0xFF; byte[] bytes = new byte[headerLength(codec) + StringHelper.ID_LENGTH + 1 + suffixLength]; in.seek(0); in.readBytes(bytes, 0, bytes.length); return bytes; }
/** Retrieves the full footer from the provided {@link IndexInput}. This throws * {@link CorruptIndexException} if this file does not have a valid footer. */ public static byte[] readFooter(IndexInput in) throws IOException { if (in.length() < footerLength()) { throw new CorruptIndexException("misplaced codec footer (file truncated?): length=" + in.length() + " but footerLength==" + footerLength(), in); } in.seek(in.length() - footerLength()); validateFooter(in); in.seek(in.length() - footerLength()); byte[] bytes = new byte[footerLength()]; in.readBytes(bytes, 0, bytes.length); return bytes; }
/** * Clones the provided input, reads all bytes from the file, and calls {@link #checkFooter} * <p> * Note that this method may be slow, as it must process the entire file. * If you just need to extract the checksum value, call {@link #retrieveChecksum}. */ public static long checksumEntireFile(IndexInput input) throws IOException { IndexInput clone = input.clone(); clone.seek(0); ChecksumIndexInput in = new BufferedChecksumIndexInput(clone); assert in.getFilePointer() == 0; if (in.length() < footerLength()) { throw new CorruptIndexException("misplaced codec footer (file truncated?): length=" + in.length() + " but footerLength==" + footerLength(), input); } in.seek(in.length() - footerLength()); return checkFooter(in); }