org.apache.lucene.util.BytesRefBuilder java code examples

Refine search

private boolean compareToLastTerm(BytesRef t) {
 if (lastTerm == null && t != null) {
  lastTerm = new BytesRefBuilder();
  lastTerm.append(t);
 } else if (t == null) {
  lastTerm = null;
 } else {
  assert lastTerm.get().compareTo(t) < 0: "lastTerm=" + lastTerm + " t=" + t;
  lastTerm.copyBytes(t);
 }
 return true;
}

/** Constructs a Term with the given field and the bytes from a builder.
 * <p>Note that a null field value results in undefined
 * behavior for most Lucene APIs that accept a Term parameter.
 */
public Term(String fld, BytesRefBuilder bytesBuilder) {
 field = fld;
 this.bytes = bytesBuilder.toBytesRef();
}

/**
 * Returns the <i>n'th</i> element of this {@link BytesRefArray}
 * @param spare a spare {@link BytesRef} instance
 * @param index the elements index to retrieve 
 * @return the <i>n'th</i> element of this {@link BytesRefArray}
 */
public BytesRef get(BytesRefBuilder spare, int index) {
 FutureObjects.checkIndex(index, lastElement);
 int offset = offsets[index];
 int length = index == lastElement - 1 ? currentOffset - offset
   : offsets[index + 1] - offset;
 spare.grow(length);
 spare.setLength(length);
 pool.readBytes(offset, spare.bytes(), 0, spare.length());
 return spare.get();
}

/**
 * Replace the content of this builder with the provided bytes. Equivalent to
 * calling {@link #clear()} and then {@link #append(BytesRef)}.
 */
public void copyBytes(BytesRef ref) {
 clear();
 append(ref);
}

/**
 * Append the provided bytes to this builder.
 */
public void append(BytesRefBuilder builder) {
 append(builder.get());
}

@Override
public BytesRef getBytesRef() {
 builder.copyChars(termBuffer, 0, termLength);
 return builder.get();
}

BytesRefBuilder scratch = new BytesRefBuilder();
   if (scratch.length() > Short.MAX_VALUE-2) {
    throw new IllegalArgumentException(
      "cannot handle analyzed forms > " + (Short.MAX_VALUE-2) + " in length (got " + scratch.length() + ")");
   short analyzedLength = (short) scratch.length();
   output.writeBytes(scratch.bytes(), 0, scratch.length());
 BytesRefBuilder analyzed = new BytesRefBuilder();
 BytesRef surface = new BytesRef();
 IntsRefBuilder scratchInts = new IntsRefBuilder();
 ByteArrayDataInput input = new ByteArrayDataInput();
  analyzed.grow(analyzedLength+2);
  input.readBytes(analyzed.bytes(), 0, analyzedLength);
  analyzed.setLength(analyzedLength);
   previousAnalyzed = new BytesRefBuilder();
   previousAnalyzed.copyBytes(analyzed);
   seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
  } else if (analyzed.get().equals(previousAnalyzed.get())) {
   dedup++;
   if (dedup >= maxSurfaceFormsPerAnalyzedForm) {
  } else {
   dedup = 0;
   previousAnalyzed.copyBytes(analyzed);

BytesRefBuilder flagsScratch = new BytesRefBuilder();
IntsRefBuilder scratchInts = new IntsRefBuilder();
 BytesRef scratch1 = new BytesRef();
 BytesRef scratch2 = new BytesRef();
  } else {
   encodeFlags(flagsScratch, wordForm);
   int ord = flagLookup.add(flagsScratch.get());
   if (ord < 0) {
    Util.toUTF32(currentEntry, scratchInts);
    words.add(scratchInts.get(), currentOrds.get());
 Util.toUTF32(currentEntry, scratchInts);
 words.add(scratchInts.get(), currentOrds.get());
 success2 = true;

@Override
public boolean advanceExact(int target) throws IOException {
 in.seek(field.dataStartFilePointer + field.numValues * (9 + field.pattern.length() + field.maxLength) + target * (1 + field.ordPattern.length()));
 SimpleTextUtil.readLine(in, scratch);
 String ordList = scratch.get().utf8ToString().trim();
 doc = target;
 if (ordList.isEmpty() == false) {
  currentOrds = ordList.split(",");
  currentIndex = 0;
  return true;
 }
 return false;
}

public UidIndexFieldData(Index index, String type, IndexFieldData<?> idFieldData) {
  this.index = index;
  BytesRefBuilder prefix = new BytesRefBuilder();
  prefix.append(new BytesRef(type));
  prefix.append((byte) '#');
  this.prefix = prefix.toBytesRef();
  this.idFieldData = idFieldData;
}

SegmentTermsEnumFrame f = getFrame(ord);
assert f != null;
final BytesRef prefix = new BytesRef(term.get().bytes, 0, f.prefix);
if (f.nextEnt == -1) {
 out.println("    frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<< BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR:0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
 if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix-1)&0xFF)) {
  out.println("      broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF));
  throw new RuntimeException("seek state is broken");
 BytesRef output = Util.get(fr.index, prefix);
 if (output == null) {
  out.println("      broken seek state: prefix is not final in index");

@Override
public Object toObject() {
 assert exists || 0 == value.length();
 return exists ? value.get().utf8ToString() : null;
}

final BytesRef prefix = new BytesRef(prefixLength + (hasFloorLeadLabel ? 1 : 0));
System.arraycopy(lastTerm.get().bytes, 0, prefix.bytes, 0, prefixLength);
prefix.length = prefixLength;
  assert StringHelper.startsWith(term.termBytes, prefix): "term.term=" + term.termBytes + " prefix=" + prefix;
  BlockTermState state = term.state;
  final int suffix = term.termBytes.length - prefixLength;
   PendingTerm term = (PendingTerm) ent;
   assert StringHelper.startsWith(term.termBytes, prefix): "term.term=" + term.termBytes + " prefix=" + prefix;
   BlockTermState state = term.state;
   final int suffix = term.termBytes.length - prefixLength;
  } else {
   PendingBlock block = (PendingBlock) ent;
   assert StringHelper.startsWith(block.prefix, prefix);
   final int suffix = block.prefix.length - prefixLength;
   assert StringHelper.startsWith(block.prefix, prefix);

private void readTermBytes(int prefix, int suffix) throws IOException {
 builder.grow(prefix + suffix);
 input.readBytes(builder.bytes(), prefix, suffix);
 builder.setLength(prefix + suffix);
}

@Override
public void copy(int slot, int doc) throws IOException {
 final BytesRef comparableBytes = getValueForDoc(doc);
 if (comparableBytes == null) {
  values[slot] = null;
 } else {
  if (tempBRs[slot] == null) {
   tempBRs[slot] = new BytesRefBuilder();
  }
  tempBRs[slot].copyBytes(comparableBytes);
  values[slot] = tempBRs[slot].get();
 }
}

private static void checksumFromLuceneFile(Directory directory, String file, Map<String, StoreFileMetaData> builder,
    Logger logger, Version version, boolean readFileAsHash) throws IOException {
  final String checksum;
  final BytesRefBuilder fileHash = new BytesRefBuilder();
  try (IndexInput in = directory.openInput(file, IOContext.READONCE)) {
    final long length;
    try {
      length = in.length();
      if (length < CodecUtil.footerLength()) {
        // truncated files trigger IAE if we seek negative... these files are really corrupted though
        throw new CorruptIndexException("Can't retrieve checksum from file: " + file + " file length must be >= " +
          CodecUtil.footerLength() + " but was: " + in.length(), in);
      }
      if (readFileAsHash) {
        // additional safety we checksum the entire file we read the hash for...
        final VerifyingIndexInput verifyingIndexInput = new VerifyingIndexInput(in);
        hashFile(fileHash, new InputStreamIndexInput(verifyingIndexInput, length), length);
        checksum = digestToString(verifyingIndexInput.verify());
      } else {
        checksum = digestToString(CodecUtil.retrieveChecksum(in));
      }
    } catch (Exception ex) {
      logger.debug(() -> new ParameterizedMessage("Can retrieve checksum from file [{}]", file), ex);
      throw ex;
    }
    builder.put(file, new StoreFileMetaData(file, length, checksum, version, fileHash.get()));
  }
}

private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
 ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
 BytesRefBuilder scratch = new BytesRefBuilder();
 TreeMap<String,Long> fields = new TreeMap<>();
 while (true) {
  SimpleTextUtil.readLine(input, scratch);
  if (scratch.get().equals(END)) {
   SimpleTextUtil.checkFooter(input);
   return fields;
  } else if (StringHelper.startsWith(scratch.get(), FIELD)) {
   String fieldName = new String(scratch.bytes(), FIELD.length, scratch.length() - FIELD.length, StandardCharsets.UTF_8);
   fields.put(fieldName, input.getFilePointer());
  }
 }
}

/** Return an array that contains the min and max values for the [offset, offset+length] interval
 *  of the given {@link BytesRef}s. */
private static BytesRef[] computeMinMax(int count, IntFunction<BytesRef> packedValues, int offset, int length) {
 assert length > 0;
 BytesRefBuilder min = new BytesRefBuilder();
 BytesRefBuilder max = new BytesRefBuilder();
 BytesRef first = packedValues.apply(0);
 min.copyBytes(first.bytes, first.offset + offset, length);
 max.copyBytes(first.bytes, first.offset + offset, length);
 for (int i = 1; i < count; ++i) {
  BytesRef candidate = packedValues.apply(i);
  if (FutureArrays.compareUnsigned(min.bytes(), 0, length, candidate.bytes, candidate.offset + offset, candidate.offset + offset + length) > 0) {
   min.copyBytes(candidate.bytes, candidate.offset + offset, length);
  } else if (FutureArrays.compareUnsigned(max.bytes(), 0, length, candidate.bytes, candidate.offset + offset, candidate.offset + offset + length) < 0) {
   max.copyBytes(candidate.bytes, candidate.offset + offset, length);
  }
 }
 return new BytesRef[]{min.get(), max.get()};
}

public void finishTerm(long defaultWeight) throws IOException {
  ArrayUtil.timSort(surfaceFormsAndPayload, 0, count);
  int deduplicator = 0;
  analyzed.append((byte) 0);
  analyzed.setLength(analyzed.length() + 1);
  analyzed.grow(analyzed.length());
  for (int i = 0; i < count; i++) {
    analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++);
    Util.toIntsRef(analyzed.get(), scratchInts);
    SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
    long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight;
    builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload));
  }
  seenSurfaceForms.clear();
  count = 0;
}

/** Just converts IntsRef to BytesRef; you must ensure the
 *  int values fit into a byte. */
public static BytesRef toBytesRef(IntsRef input, BytesRefBuilder scratch) {
 scratch.grow(input.length);
 for(int i=0;i<input.length;i++) {
  int value = input.ints[i+input.offset];
  // NOTE: we allow -128 to 255
  assert value >= Byte.MIN_VALUE && value <= 255: "value " + value + " doesn't fit into byte";
  scratch.setByteAt(i, (byte) value);
 }
 scratch.setLength(input.length);
 return scratch.get();
}

Javadoc

A builder for BytesRef instances.

Most used methods

<init>
Sole constructor.
get
Return a BytesRef that points to the internal content of this builder. Any update to the content of
toBytesRef
Build a new BytesRef that has the same content as this buffer.
append
Append the provided bytes to this builder.
copyBytes
Replace the content of this builder with the provided bytes. Equivalent to calling #clear() and then
bytes
Return a reference to the bytes of this builder.
grow
Ensure that this builder can hold at least capacity bytes without resizing.
length
Return the number of bytes in this buffer.
clear
Reset this builder to the empty state.
copyChars
Replace the content of this buffer with UTF-8 encoded bytes that would represent the provided text.
setLength
Set the length.
setByteAt
Set a byte.

Popular in Java

Making http requests using okhttp
findViewById (Activity)
getExternalFilesDir (Context)
getSharedPreferences (Context)
BufferedWriter (java.io)
Wraps an existing Writer and buffers the output. Expensive interaction with the underlying reader is
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
Reference (javax.naming)
CodeWhisperer alternatives

How to useBytesRefBuilder in org.apache.lucene.util

Best Java code snippets using org.apache.lucene.util.BytesRefBuilder (Showing top 20 results out of 315)

Refine search

How to use
BytesRefBuilder
in
org.apache.lucene.util