@Override public boolean equals(Object other) { if (!(other instanceof ChiSquare)) { return false; } return super.equals(other); }
@Override protected SignificanceHeuristic newHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) { return new ChiSquare(includeNegatives, backgroundIsSuperset); } };
@Override protected SignificanceHeuristic newHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) { return new MutualInformation(includeNegatives, backgroundIsSuperset); } };
@Override public int hashCode() { int result = NAME.hashCode(); result = 31 * result + super.hashCode(); return result; }
@Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(NAME); super.build(builder); builder.endObject(); return builder; } }
@Override protected SignificanceHeuristic newHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) { return new GND(backgroundIsSuperset); }
/** * Indicates the significance of a term in a sample by determining what percentage * of all occurrences of a term are found in the sample. */ @Override public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize) { checkFrequencyValidity(subsetFreq, subsetSize, supersetFreq, supersetSize, "PercentageScore"); if (supersetFreq == 0) { // avoid a divide by zero issue return 0; } return (double) subsetFreq / (double) supersetFreq; }
void updateScore(SignificanceHeuristic significanceHeuristic) { score = significanceHeuristic.getScore(subsetDf, subsetSize, supersetDf, supersetSize); }
/** * Calculates Chi^2 * see "Information Retrieval", Manning et al., Eq. 13.19 */ @Override public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize) { Frequencies frequencies = computeNxys(subsetFreq, subsetSize, supersetFreq, supersetSize, "ChiSquare"); // here we check if the term appears more often in subset than in background without subset. if (!includeNegatives && frequencies.N11 / frequencies.N_1 < frequencies.N10 / frequencies.N_0) { return Double.NEGATIVE_INFINITY; } return (frequencies.N * Math.pow((frequencies.N11 * frequencies.N00 - frequencies.N01 * frequencies.N10), 2.0) / ((frequencies.N_1) * (frequencies.N1_) * (frequencies.N0_) * (frequencies.N_0))); }
protected void checkFrequencies(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize, String scoreFunctionName) { checkFrequencyValidity(subsetFreq, subsetSize, supersetFreq, supersetSize, scoreFunctionName); if (backgroundIsSuperset) { if (subsetFreq > supersetFreq) { throw new IllegalArgumentException("subsetFreq > supersetFreq" + SCORE_ERROR_MESSAGE); } if (subsetSize > supersetSize) { throw new IllegalArgumentException("subsetSize > supersetSize" + SCORE_ERROR_MESSAGE); } if (supersetFreq - subsetFreq > supersetSize - subsetSize) { throw new IllegalArgumentException("supersetFreq - subsetFreq > supersetSize - subsetSize" + SCORE_ERROR_MESSAGE); } } }
ExecutableScriptHeuristic(Script script, SignificantTermsHeuristicScoreScript executableScript) { super(script); subsetSizeHolder = new LongAccessor(); supersetSizeHolder = new LongAccessor(); subsetDfHolder = new LongAccessor(); supersetDfHolder = new LongAccessor(); this.executableScript = executableScript; params.putAll(script.getParams()); params.put("_subset_freq", subsetDfHolder); params.put("_subset_size", subsetSizeHolder); params.put("_superset_freq", supersetDfHolder); params.put("_superset_size", supersetSizeHolder); }
@Override protected ValuesSourceAggregatorFactory<ValuesSource, ?> innerBuild(SearchContext context, ValuesSourceConfig<ValuesSource> config, AggregatorFactory<?> parent, Builder subFactoriesBuilder) throws IOException { SignificanceHeuristic executionHeuristic = this.significanceHeuristic.rewrite(context); return new SignificantTermsAggregatorFactory(name, config, includeExclude, executionHint, filterBuilder, bucketCountThresholds, executionHeuristic, context, parent, subFactoriesBuilder, metaData); }
@Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(NAME); super.build(builder); builder.endObject(); return builder; }
@Override public boolean equals(Object other) { if (!(other instanceof MutualInformation)) { return false; } return super.equals(other); }
@Override public int hashCode() { int result = NAME.hashCode(); result = 31 * result + super.hashCode(); return result; }
@Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(NAME); super.build(builder); builder.endObject(); return builder; } }
@Override protected AggregatorFactory<?> doBuild(SearchContext context, AggregatorFactory<?> parent, Builder subFactoriesBuilder) throws IOException { SignificanceHeuristic executionHeuristic = this.significanceHeuristic.rewrite(context); return new SignificantTextAggregatorFactory(name, includeExclude, filterBuilder, bucketCountThresholds, executionHeuristic, context, parent, subFactoriesBuilder, fieldName, sourceFieldNames, filterDuplicateText, metaData); }
@Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(NAME); super.build(builder); builder.endObject(); return builder; }
@Override public boolean equals(Object other) { if (!(other instanceof GND)) { return false; } return super.equals(other); }
@Override public int hashCode() { int result = NAME.hashCode(); result = 31 * result + super.hashCode(); return result; }