public ContentFunctionEvaluator(Set<String> fields, int distance, float maxScore, Map<String,TermFrequencyList> termOffsetMap, String... terms) { this.fields = fields; this.distance = distance; this.maxScore = TermWeightPosition.positionScoreToTermWeightScore(maxScore); this.termOffsetMap = termOffsetMap; this.terms = terms; this.canProcess = initialize(); }
protected static final int DEFAULT_MAX_SCORE = TermWeightPosition.positionScoreToTermWeightScore(Float.NEGATIVE_INFINITY);
public Multimap<BulkIngestKey,Value> processContent(final RawRecordContainer event, Multimap<String,NormalizedContentInterface> eventFields, StatusReporter reporter) { BufferedReader content = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(event.getRawData()))); Multimap<BulkIngestKey,Value> values = HashMultimap.create(); // Process test file // CSV file => position,skips,term,score\n content.lines().forEach( line -> { String[] parts = line.split(","); TermWeight.Info info = TermWeight.Info.newBuilder().addTermOffset(Integer.parseInt(parts[0])) .addPrevSkips(Integer.parseInt(parts[1])) .addScore(TermWeightPosition.positionScoreToTermWeightScore(Float.parseFloat(parts[3]))) .setZeroOffsetMatch(true).build(); NormalizedFieldAndValue nfv = new NormalizedFieldAndValue("BODY", parts[2]); getShardFIKey(nfv, event, values); getShardIndexFIKey(nfv, event, values); getTFKey(nfv, event, values, info); }); return values; }
private TermWeightPosition getPosition(int offset, int prevSkips, float score) { return new TermWeightPosition.Builder().setOffset(offset).setPrevSkips(prevSkips).setScore(TermWeightPosition.positionScoreToTermWeightScore(score)) .build(); }