@Override public int compareTo(TermWeightPosition o) { int result = Integer.compare(getLowOffset(), o.getLowOffset()); if (result != 0) { return result; } return Integer.compare(getOffset(), o.getOffset()); }
public boolean equals(TermWeightPosition o) { return (compareTo(o) == 0); }
@Override public boolean equals(Object o) { if (o instanceof TermWeightPosition) { return this.equals((TermWeightPosition) o); } return false; }
/** * * Conditional if to allow these positions based on offset True if zeroOffset is not allowed, and offsets are equal. False if zeroOffsets are allowed. * * @param o * Other position * @return True if zeroOffset is not allowed, and offsets are equal */ public boolean isZeroOffset(EvaluateTermPosition o) { if ((!this.termWeightPosition.getZeroOffsetMatch()) || (!o.termWeightPosition.getZeroOffsetMatch())) { if (this.termWeightPosition.getOffset() == o.termWeightPosition.getOffset()) { if (log.isTraceEnabled()) { log.trace("EvaluateTermPosition.isZeroOffset: " + this.termWeightPosition.getOffset() + " == " + o.termWeightPosition.getOffset()); } return true; } } return false; }
@Override public Value aggregate() { for (TermWeightPosition offset : offsets) { builder.addTermOffset(offset.getOffset()); if (0 <= offset.getPrevSkips()) { builder.addPrevSkips(offset.getPrevSkips()); } if (0 <= offset.getScore()) { builder.addScore(offset.getScore()); } // If the zeroOffset has been set and the termweight is still default(true) if (builder.getZeroOffsetMatch()) { builder.setZeroOffsetMatch(offset.getZeroOffsetMatch()); } } return new Value(builder.build().toByteArray()); }
OffsetList o = offsetQueue.remove(); if (maxOffset.get().getLowOffset() - o.getMinOffset().getOffset() <= distance) { return true; if (maxOffset.get().getLowOffset() - o.getMaxOffset().getOffset() > distance) { return false; if (nextOffset.get().compareTo(maxOffset.get()) > 0) { maxOffset = nextOffset;
int maxSkip = 0; for (TermWeightPosition twp : offsets.get(i)) { if (twp.getPrevSkips() > maxSkip) { maxSkip = twp.getPrevSkips(); if (twp.getScore() > maxScore) { if (log.isTraceEnabled()) { log.trace("[" + terms[i] + "] Skip score => " + twp);
public ContentFunctionEvaluator(Set<String> fields, int distance, float maxScore, Map<String,TermFrequencyList> termOffsetMap, String... terms) { this.fields = fields; this.distance = distance; this.maxScore = TermWeightPosition.positionScoreToTermWeightScore(maxScore); this.termOffsetMap = termOffsetMap; this.terms = terms; this.canProcess = initialize(); }
private void filterOffsets(List<List<TermWeightPosition>> offsets) { // if max score is maximum possible value short circuit if (maxScore == DEFAULT_MAX_SCORE) { return; } for (List<TermWeightPosition> offset : offsets) { Iterator<TermWeightPosition> twpIter = offset.iterator(); while (twpIter.hasNext()) { Integer score = twpIter.next().getScore(); if (null == score || score > maxScore) { twpIter.remove(); } } } }
public TermWeightPosition clone() { return new TermWeightPosition(this); }
TermWeightPosition first = list.get(0); if (first.compareTo(maxFirstOffset) > 0) { maxFirstTermIndex = i; maxFirstOffset = first; if (last.compareTo(minLastOffset) <= 0) { minLastTermIndex = i; minLastOffset = last; int maxFirstTWP = maxFirstOffset.getLowOffset() - (maxFirstTermIndex * distance); for (int i = maxFirstTermIndex; i >= 0; i--) { maxFirstTWP -= maxSkips[i]; int minLastTWP = minLastOffset.getOffset() + (terms.length - minLastTermIndex - 1) * distance; for (int i = minLastTermIndex; i < maxSkips.length; i++) { minLastTWP += maxSkips[i];
protected static final int DEFAULT_MAX_SCORE = TermWeightPosition.positionScoreToTermWeightScore(Float.NEGATIVE_INFINITY);
public TermWeightPosition build() { return new TermWeightPosition(this); }
switch (direction) { case FORWARD: low = termWeightPosition.getLowOffset(); high = termWeightPosition.getOffset() + distance; eval = o; break; case REVERSE: low = o.termWeightPosition.getLowOffset(); high = o.termWeightPosition.getOffset() + distance; eval = this; break; log.trace("EvaluateTermPosition.isWithIn: " + low + "<=" + eval.termWeightPosition.getOffset() + " && " + eval.termWeightPosition.getLowOffset() + "<=" + high); return (low <= eval.termWeightPosition.getOffset() && eval.termWeightPosition.getLowOffset() <= high);
public Multimap<BulkIngestKey,Value> processContent(final RawRecordContainer event, Multimap<String,NormalizedContentInterface> eventFields, StatusReporter reporter) { BufferedReader content = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(event.getRawData()))); Multimap<BulkIngestKey,Value> values = HashMultimap.create(); // Process test file // CSV file => position,skips,term,score\n content.lines().forEach( line -> { String[] parts = line.split(","); TermWeight.Info info = TermWeight.Info.newBuilder().addTermOffset(Integer.parseInt(parts[0])) .addPrevSkips(Integer.parseInt(parts[1])) .addScore(TermWeightPosition.positionScoreToTermWeightScore(Float.parseFloat(parts[3]))) .setZeroOffsetMatch(true).build(); NormalizedFieldAndValue nfv = new NormalizedFieldAndValue("BODY", parts[2]); getShardFIKey(nfv, event, values); getShardIndexFIKey(nfv, event, values); getTFKey(nfv, event, values, info); }); return values; }
@Override public int compareTo(EvaluateTermPosition o) { int result = termWeightPosition.compareTo(o.termWeightPosition); if (result != 0) { return result; } // Reverse the phrase index so you hit the other phrases before hitting current phrase index // This helps with end match scenarios result = Integer.compare(o.phraseIndex, phraseIndex); if (result != 0) { return result; } return term.compareTo(o.term); }
if (root.isSameTerm(skip) && root.termWeightPosition.equals(skip.termWeightPosition)) { continue;
private TermWeightPosition getPosition(int offset, int prevSkips, float score) { return new TermWeightPosition.Builder().setOffset(offset).setPrevSkips(prevSkips).setScore(TermWeightPosition.positionScoreToTermWeightScore(score)) .build(); }
@Override public int compareTo(OffsetList o) { return this.getMinOffset().compareTo(o.getMinOffset()); }