/** * * @return terms stemmed */ public Set<String> getTerms(String id) throws IOException { StopWatch stopWatch=null; if(logger.isDebugEnabled()){ stopWatch = new StopWatch(); stopWatch.start(); } TermVectorsResponse response = client.prepareTermVector(indexName, documentType, id) .setOffsets(false).setPositions(false).setFieldStatistics(false) .setSelectedFields(this.bodyField). execute().actionGet(); Terms terms = response.getFields().terms(this.bodyField); TermsEnum iterator = terms.iterator(); Set<String> termsSet = new HashSet<>(); for (int i=0;i<terms.size();i++){ String term = iterator.next().utf8ToString(); termsSet.add(term); } if(logger.isDebugEnabled()){ logger.debug("time spent on getNgrams from doc "+id+" = "+stopWatch+ " It has "+termsSet.size()+" ngrams"); } return termsSet; }
private TermVectorsResponse getTermVectors(String text) throws IOException { return getElasticClient() .prepareTermVectors() .setIndex(policyIndexName) .setType(percolatorTypeName) .setDfs(true) .setTermStatistics(true) .setFieldStatistics(false) .setPositions(true) .setOffsets(true) .setPayloads(false) .setDoc(jsonBuilder() .startObject() .field(booleanRestrictionFieldName, text) .endObject()) .get(elasticsearchProperties.getElasticsearchSearchTimeout()); }
private Map<Integer,String> getTermVectorWithException(String field, String id) throws IOException { TermVectorsResponse response = client.prepareTermVector(indexName, documentType, id) .setOffsets(false).setPositions(true).setFieldStatistics(false) .setTermStatistics(false) .setSelectedFields(field).
.setOffsets(false).setPositions(false).setFieldStatistics(false) .setTermStatistics(true) .setSelectedFields(field).