List<Integer> offsets, byte[] visibility) throws IOException, InterruptedException { TermWeight.Info.Builder builder = TermWeight.Info.newBuilder(); for (Integer offset : offsets) { builder.addTermOffset(offset); Value value = new Value(builder.build().toByteArray());
public datawave.ingest.protobuf.TermWeight.Info buildPartial() { datawave.ingest.protobuf.TermWeight.Info result = new datawave.ingest.protobuf.TermWeight.Info(this); int from_bitField0_ = bitField0_; int to_bitField0_ = 0; if (((bitField0_ & 0x00000001) == 0x00000001)) { termOffset_ = java.util.Collections.unmodifiableList(termOffset_); bitField0_ = (bitField0_ & ~0x00000001); } result.termOffset_ = termOffset_; if (((bitField0_ & 0x00000002) == 0x00000002)) { prevSkips_ = java.util.Collections.unmodifiableList(prevSkips_); bitField0_ = (bitField0_ & ~0x00000002); } result.prevSkips_ = prevSkips_; if (((bitField0_ & 0x00000004) == 0x00000004)) { score_ = java.util.Collections.unmodifiableList(score_); bitField0_ = (bitField0_ & ~0x00000004); } result.score_ = score_; if (((from_bitField0_ & 0x00000008) == 0x00000008)) { to_bitField0_ |= 0x00000001; } result.zeroOffsetMatch_ = zeroOffsetMatch_; result.bitField0_ = to_bitField0_; onBuilt(); return result; }
public Builder mergeFrom(datawave.ingest.protobuf.TermWeight.Info other) { if (other == datawave.ingest.protobuf.TermWeight.Info.getDefaultInstance()) return this; if (!other.termOffset_.isEmpty()) { if (other.hasZeroOffsetMatch()) { setZeroOffsetMatch(other.getZeroOffsetMatch()); this.mergeUnknownFields(other.getUnknownFields()); return this;
TermWeight.Info twInfo = TermWeight.Info.parseFrom(tfSource.getTopValue().get()); for (int i = 0; i < twInfo.getTermOffsetCount(); i++) { position.setTermWeightOffsetInfo(twInfo, i); offsets.put(twZone, position.build());
size += 1 * getTermOffsetList().size(); size += 1 * getPrevSkipsList().size(); size += 1 * getScoreList().size(); size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size;
List<Integer> offsets, byte[] visibility, boolean deleteMode) throws IOException, InterruptedException { TermWeight.Info.Builder builder = TermWeight.Info.newBuilder(); for (Integer offset : offsets) { builder.addTermOffset(offset); Value value = new Value(builder.build().toByteArray());
public Multimap<BulkIngestKey,Value> processContent(final RawRecordContainer event, Multimap<String,NormalizedContentInterface> eventFields, StatusReporter reporter) { BufferedReader content = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(event.getRawData()))); Multimap<BulkIngestKey,Value> values = HashMultimap.create(); // Process test file // CSV file => position,skips,term,score\n content.lines().forEach( line -> { String[] parts = line.split(","); TermWeight.Info info = TermWeight.Info.newBuilder().addTermOffset(Integer.parseInt(parts[0])) .addPrevSkips(Integer.parseInt(parts[1])) .addScore(TermWeightPosition.positionScoreToTermWeightScore(Float.parseFloat(parts[3]))) .setZeroOffsetMatch(true).build(); NormalizedFieldAndValue nfv = new NormalizedFieldAndValue("BODY", parts[2]); getShardFIKey(nfv, event, values); getShardIndexFIKey(nfv, event, values); getTFKey(nfv, event, values, info); }); return values; }
@Override public void collect(Value value) { // Make sure we don't aggregate something else if (value == null || value.get().length == 0) { return; } TermWeight.Info info; try { info = TermWeight.Info.parseFrom(value.get()); } catch (InvalidProtocolBufferException e) { log.error("Value passed to aggregator was not of type TermWeight.Info", e); return; } // Add each offset into the list maintaining sorted order TermWeightPosition.Builder builder = new TermWeightPosition.Builder(); for (int i = 0; i < info.getTermOffsetCount(); i++) { builder.setTermWeightOffsetInfo(info, i); offsets.add(builder.build()); builder.reset(); } }
@Override public Value aggregate() { for (TermWeightPosition offset : offsets) { builder.addTermOffset(offset.getOffset()); if (0 <= offset.getPrevSkips()) { builder.addPrevSkips(offset.getPrevSkips()); } if (0 <= offset.getScore()) { builder.addScore(offset.getScore()); } // If the zeroOffset has been set and the termweight is still default(true) if (builder.getZeroOffsetMatch()) { builder.setZeroOffsetMatch(offset.getZeroOffsetMatch()); } } return new Value(builder.build().toByteArray()); }
public static Builder newBuilder(datawave.ingest.protobuf.TermWeight.Info prototype) { return newBuilder().mergeFrom(prototype); }
public Builder toBuilder() { return newBuilder(this); }
public Builder newBuilderForType() { return newBuilder(); }
public Builder setTermWeightOffsetInfo(TermWeight.Info info, int i) { setOffset(info.getTermOffset(i)); // Only pull the previous skips if the counts match the offsets // offsets, skips, and scores are linked by index so array lengths must match if (info.getTermOffsetCount() == info.getPrevSkipsCount()) { setPrevSkips(info.getPrevSkips(i)); } // Only pull the scores if the counts match the offsets if (info.getTermOffsetCount() == info.getScoreCount()) { setScore(info.getScore(i)); } setZeroOffsetMatch(info.getZeroOffsetMatch()); return this; }
public datawave.ingest.protobuf.TermWeight.Info getDefaultInstanceForType() { return datawave.ingest.protobuf.TermWeight.Info.getDefaultInstance(); }
private void getTFKey(final NormalizedFieldAndValue nfv, final RawRecordContainer event, final Multimap values, final TermWeight.Info info) { byte[] fieldVisibility = getVisibility(event, nfv); StringBuilder colq = new StringBuilder(this.eventDataTypeName.length() + this.eventUid.length() + nfv.getIndexedFieldName().length() + nfv.getIndexedFieldValue().length() + 3); colq.append(this.eventDataTypeName).append('\u0000').append(this.eventUid).append('\u0000').append(nfv.getIndexedFieldValue()).append('\u0000') .append(nfv.getIndexedFieldName()); BulkIngestKey bKey = new BulkIngestKey(new Text(this.getShardTableName()), new Key(shardId, ExtendedDataTypeHandler.TERM_FREQUENCY_COLUMN_FAMILY.getBytes(), colq.toString().getBytes(), fieldVisibility, event.getDate(), helper.getDeleteMode())); values.put(bKey, new Value(info.toByteArray())); }
@Override public void reset() { this.offsets.clear(); this.builder = TermWeight.Info.newBuilder(); }