private int compareToLastTerm(int fieldNumber, byte[] termBytes, int termBytesLength) { if (lastFieldNumber != fieldNumber) { final int cmp = fieldInfos.fieldName(lastFieldNumber).compareTo(fieldInfos.fieldName(fieldNumber)); // If there is a field named "" (empty string) then we // will get 0 on this comparison, yet, it's "OK". But // it's not OK if two different field numbers map to // the same name. if (cmp != 0 || lastFieldNumber != -1) return cmp; } UnicodeUtil.UTF8toUTF16(lastTermBytes, 0, lastTermBytesLength, utf16Result1); UnicodeUtil.UTF8toUTF16(termBytes, 0, termBytesLength, utf16Result2); final int len; if (utf16Result1.length < utf16Result2.length) len = utf16Result1.length; else len = utf16Result2.length; for(int i=0;i<len;i++) { final char ch1 = utf16Result1.result[i]; final char ch2 = utf16Result2.result[i]; if (ch1 != ch2) return ch1-ch2; } return utf16Result1.length - utf16Result2.length; }
private void setMatchingSegmentReaders() { // If the i'th reader is a SegmentReader and has // identical fieldName -> number mapping, then this // array will be non-null at position i: matchingSegmentReaders = new SegmentReader[readers.size()]; // If this reader is a SegmentReader, and all of its // field name -> number mappings match the "merged" // FieldInfos, then we can do a bulk copy of the // stored fields: for (int i = 0; i < readers.size(); i++) { IndexReader reader = (IndexReader) readers.get(i); if (reader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; boolean same = true; FieldInfos segmentFieldInfos = segmentReader.getFieldInfos(); for (int j = 0; same && j < segmentFieldInfos.size(); j++) same = fieldInfos.fieldName(j).equals(segmentFieldInfos.fieldName(j)); if (same) matchingSegmentReaders[i] = segmentReader; } } // Used for bulk-reading raw bytes for stored fields rawDocLengths = new int[MAX_RAW_MERGE_DOCS]; rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS]; }
private int compareToLastTerm(int fieldNumber, byte[] termBytes, int termBytesLength) { if (lastFieldNumber != fieldNumber) { final int cmp = fieldInfos.fieldName(lastFieldNumber).compareTo(fieldInfos.fieldName(fieldNumber)); // If there is a field named "" (empty string) then we // will get 0 on this comparison, yet, it's "OK". But // it's not OK if two different field numbers map to // the same name. if (cmp != 0 || lastFieldNumber != -1) return cmp; } UnicodeUtil.UTF8toUTF16(lastTermBytes, 0, lastTermBytesLength, utf16Result1); UnicodeUtil.UTF8toUTF16(termBytes, 0, termBytesLength, utf16Result2); final int len; if (utf16Result1.length < utf16Result2.length) len = utf16Result1.length; else len = utf16Result2.length; for(int i=0;i<len;i++) { final char ch1 = utf16Result1.result[i]; final char ch2 = utf16Result2.result[i]; if (ch1 != ch2) return ch1-ch2; } return utf16Result1.length - utf16Result2.length; }
private void setMatchingSegmentReaders() { // If the i'th reader is a SegmentReader and has // identical fieldName -> number mapping, then this // array will be non-null at position i: matchingSegmentReaders = new SegmentReader[readers.size()]; // If this reader is a SegmentReader, and all of its // field name -> number mappings match the "merged" // FieldInfos, then we can do a bulk copy of the // stored fields: for (int i = 0; i < readers.size(); i++) { IndexReader reader = (IndexReader) readers.get(i); if (reader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; boolean same = true; FieldInfos segmentFieldInfos = segmentReader.getFieldInfos(); for (int j = 0; same && j < segmentFieldInfos.size(); j++) same = fieldInfos.fieldName(j).equals(segmentFieldInfos.fieldName(j)); if (same) matchingSegmentReaders[i] = segmentReader; } } // Used for bulk-reading raw bytes for stored fields rawDocLengths = new int[MAX_RAW_MERGE_DOCS]; rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS]; }
final private String[] readFields(int fieldCount) throws IOException { int number = 0; String[] fields = new String[fieldCount]; for (int i = 0; i < fieldCount; i++) { if (format >= FORMAT_VERSION) number = tvd.readVInt(); else number += tvd.readVInt(); fields[i] = fieldInfos.fieldName(number); } return fields; }
final private String[] readFields(int fieldCount) throws IOException { int number = 0; String[] fields = new String[fieldCount]; for (int i = 0; i < fieldCount; i++) { if (format >= FORMAT_VERSION) number = tvd.readVInt(); else number += tvd.readVInt(); fields[i] = fieldInfos.fieldName(number); } return fields; }
private final Term readTerm() throws IOException { int start = input.readVInt(); int length = input.readVInt(); int totalLength = start + length; if (buffer.length < totalLength) growBuffer(totalLength); input.readChars(buffer, start, length); return new Term(fieldInfos.fieldName(input.readVInt()), new String(buffer, 0, totalLength), false); }
/** Adds a new <<fieldNumber, termBytes>, TermInfo> pair to the set. Term must be lexicographically greater than all previous Terms added. TermInfo pointers must be positive and greater than all previous.*/ void add(int fieldNumber, byte[] termBytes, int termBytesLength, TermInfo ti) throws IOException { assert compareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 || (isIndex && termBytesLength == 0 && lastTermBytesLength == 0) : "Terms are out of order: field=" + fieldInfos.fieldName(fieldNumber) + " (number " + fieldNumber + ")" + " lastField=" + fieldInfos.fieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" + " text=" + new String(termBytes, 0, termBytesLength, "UTF-8") + " lastText=" + new String(lastTermBytes, 0, lastTermBytesLength, "UTF-8"); assert ti.freqPointer >= lastTi.freqPointer: "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")"; assert ti.proxPointer >= lastTi.proxPointer: "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")"; if (!isIndex && size % indexInterval == 0) other.add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term writeTerm(fieldNumber, termBytes, termBytesLength); // write term output.writeVInt(ti.docFreq); // write doc freq output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.writeVLong(ti.proxPointer - lastTi.proxPointer); if (ti.docFreq >= skipInterval) { output.writeVInt(ti.skipOffset); } if (isIndex) { output.writeVLong(other.output.getFilePointer() - lastIndexPointer); lastIndexPointer = other.output.getFilePointer(); // write pointer } lastFieldNumber = fieldNumber; lastTi.set(ti); size++; }
/** Adds a new <<fieldNumber, termBytes>, TermInfo> pair to the set. Term must be lexicographically greater than all previous Terms added. TermInfo pointers must be positive and greater than all previous.*/ void add(int fieldNumber, byte[] termBytes, int termBytesLength, TermInfo ti) throws IOException { assert compareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 || (isIndex && termBytesLength == 0 && lastTermBytesLength == 0) : "Terms are out of order: field=" + fieldInfos.fieldName(fieldNumber) + " (number " + fieldNumber + ")" + " lastField=" + fieldInfos.fieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" + " text=" + new String(termBytes, 0, termBytesLength, "UTF-8") + " lastText=" + new String(lastTermBytes, 0, lastTermBytesLength, "UTF-8"); assert ti.freqPointer >= lastTi.freqPointer: "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")"; assert ti.proxPointer >= lastTi.proxPointer: "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")"; if (!isIndex && size % indexInterval == 0) other.add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term writeTerm(fieldNumber, termBytes, termBytesLength); // write term output.writeVInt(ti.docFreq); // write doc freq output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.writeVLong(ti.proxPointer - lastTi.proxPointer); if (ti.docFreq >= skipInterval) { output.writeVInt(ti.skipOffset); } if (isIndex) { output.writeVLong(other.output.getFilePointer() - lastIndexPointer); lastIndexPointer = other.output.getFilePointer(); // write pointer } lastFieldNumber = fieldNumber; lastTi.set(ti); size++; }
fields[i] = fieldInfos.fieldName(number);
public final void read(IndexInput input, FieldInfos fieldInfos) throws IOException { this.term = null; // invalidate cache int start = input.readVInt(); int length = input.readVInt(); int totalLength = start + length; if (preUTF8Strings) { text.setLength(totalLength); input.readChars(text.result, start, length); } else { if (dirty) { // Fully convert all bytes since bytes is dirty UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes); bytes.setLength(totalLength); input.readBytes(bytes.result, start, length); UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text); dirty = false; } else { // Incrementally convert only the UTF8 bytes that are new: bytes.setLength(totalLength); input.readBytes(bytes.result, start, length); UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text); } } this.field = fieldInfos.fieldName(input.readVInt()); }
public final void read(IndexInput input, FieldInfos fieldInfos) throws IOException { this.term = null; // invalidate cache int start = input.readVInt(); int length = input.readVInt(); int totalLength = start + length; if (preUTF8Strings) { text.setLength(totalLength); input.readChars(text.result, start, length); } else { if (dirty) { // Fully convert all bytes since bytes is dirty UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes); bytes.setLength(totalLength); input.readBytes(bytes.result, start, length); UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text); dirty = false; } else { // Incrementally convert only the UTF8 bytes that are new: bytes.setLength(totalLength); input.readBytes(bytes.result, start, length); UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text); } } this.field = fieldInfos.fieldName(input.readVInt()); }