void removeFieldsFromDocument( Document document, String key, String exactKey, Object value ) { Set<String> values = null; if ( value != null ) { String stringValue = value.toString(); values = new HashSet<>( Arrays.asList( document.getValues( exactKey ) ) ); if ( !values.remove( stringValue ) ) { return; } } removeFieldFromDocument( document, key ); if ( value != null ) { for ( String existingValue : values ) { addNewFieldToDocument( document, key, existingValue ); } } }
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { IndexSearcher is = new IndexSearcher(reader); TermQuery tq = new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, doc.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])); TopDocs topDocs = is.search(tq, 1); if (topDocs.totalHits > 0) { return search(topDocs.scoreDocs[0].doc); } else return null; }
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { IndexSearcher is = new IndexSearcher(reader); TermQuery tq = new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, doc.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])); TopDocs topDocs = is.search(tq, 1); if (topDocs.totalHits > 0) { return search(topDocs.scoreDocs[0].doc); } else return null; }
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { try { GlobalFeature queryFeature = feature.getClass().newInstance(); queryFeature.setByteArrayRepresentation(doc.getBinaryValue(featureFieldName).bytes, doc.getBinaryValue(featureFieldName).offset, doc.getBinaryValue(featureFieldName).length); return search(doc.getValues(hashesFieldName)[0].split(" "), queryFeature, reader); } catch (Exception e) { e.printStackTrace(); } return null; }
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { try { GlobalFeature queryFeature = feature.getClass().newInstance(); queryFeature.setByteArrayRepresentation(doc.getBinaryValue(featureFieldName).bytes, doc.getBinaryValue(featureFieldName).offset, doc.getBinaryValue(featureFieldName).length); return search(doc.getValues(hashesFieldName)[0].split(" "), queryFeature, reader); // return search(doc.getValues(hashesFieldName + "_q")[0].split(" "), queryFeature, reader); // just for debug if a query feature is stored in the index. } catch (Exception e) { e.printStackTrace(); } return null; }
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { try { GlobalFeature queryFeature = feature.getClass().newInstance(); queryFeature.setByteArrayRepresentation(doc.getBinaryValue(featureFieldName).bytes, doc.getBinaryValue(featureFieldName).offset, doc.getBinaryValue(featureFieldName).length); return search(doc.getValues(hashesFieldName)[0].split(" "), queryFeature, reader); } catch (Exception e) { e.printStackTrace(); } return null; }
BufferedImage tmp = ImageIO.read(new FileInputStream(reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]));
BufferedImage tmp = ImageIO.read(new FileInputStream(reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]));
/** * Puts results into a HTML file. * * @param prefix * @param hits * @param queryImage * @return * @throws IOException */ public static String saveImageResultsToHtml(String prefix, ImageSearchHits hits, String queryImage, IndexReader reader) throws IOException { long l = System.currentTimeMillis() / 1000; String fileName = "results-" + prefix + "-" + l + ".html"; BufferedWriter bw = new BufferedWriter(new FileWriter(fileName)); bw.write("<html>\n" + "<head><title>Search Results</title></head>\n" + "<body bgcolor=\"#FFFFFF\">\n"); bw.write("<h3>query</h3>\n"); bw.write("<a href=\"file://" + queryImage + "\"><img src=\"file://" + queryImage + "\"></a><p>\n"); bw.write("<h3>results</h3>\n"); for (int i = 0; i < hits.length(); i++) { bw.write(hits.score(i) + " - <a href=\"file://" + reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + "\"><img src=\"file://" + reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + "\"></a><p>\n"); } bw.write("</body>\n" + "</html>"); bw.close(); return fileName; }
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { try { GlobalFeature queryFeature = feature.getClass().newInstance(); queryFeature.setByteArrayRepresentation(doc.getBinaryValue(featureFieldName).bytes, doc.getBinaryValue(featureFieldName).offset, doc.getBinaryValue(featureFieldName).length); return search(doc.getValues(hashesFieldName)[0].split(" "), queryFeature, reader); // return search(doc.getValues(hashesFieldName + "_q")[0].split(" "), queryFeature, reader); // just for debug if a query feature is stored in the index. } catch (Exception e) { e.printStackTrace(); } return null; }
/** * Puts results into a HTML file. * * @param prefix * @param hits * @param queryImage * @return * @throws IOException */ public static String saveImageResultsToHtml(String prefix, ImageSearchHits hits, String queryImage, IndexReader reader) throws IOException { long l = System.currentTimeMillis() / 1000; String fileName = "results-" + prefix + "-" + l + ".html"; BufferedWriter bw = new BufferedWriter(new FileWriter(fileName)); bw.write("<html>\n" + "<head><title>Search Results</title></head>\n" + "<body bgcolor=\"#FFFFFF\">\n"); bw.write("<h3>query</h3>\n"); bw.write("<a href=\"file://" + queryImage + "\"><img src=\"file://" + queryImage + "\"></a><p>\n"); bw.write("<h3>results</h3>\n"); for (int i = 0; i < hits.length(); i++) { bw.write(hits.score(i) + " - <a href=\"file://" + reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + "\"><img src=\"file://" + reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + "\"></a><p>\n"); } bw.write("</body>\n" + "</html>"); bw.close(); return fileName; }
public String getRSSItem(Document doc) { StringBuilder output = new StringBuilder(); output.append("<item>"); output.append(emitTag("guid", doc.get(DublinCore.SOURCE.getName()), "isPermalink", "true")); output.append(emitTag("title", doc.get(TikaCoreProperties.TITLE.getName()), null, null)); output.append(emitTag("link", doc.get(DublinCore.SOURCE.getName()), null, null)); output.append(emitTag("author", doc.get(TikaCoreProperties.CREATOR.getName()), null, null)); for (String topic : doc.getValues(TikaCoreProperties.SUBJECT.getName())) { output.append(emitTag("category", topic, null, null)); } output.append(emitTag("pubDate", rssDateFormat.format(ISO8601.parse(doc .get(TikaCoreProperties.CREATED.getName()))), null, null)); output.append(emitTag("description", doc.get(TikaCoreProperties.TITLE.getName()), null, null)); output.append("</item>"); return output.toString(); }
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { SimpleImageSearchHits sh = null; IndexSearcher isearcher = new IndexSearcher(reader); isearcher.setSimilarity(similarity); String queryString = doc.getValues(fieldName)[0]; Query tq = null; try { tq = qp.parse(queryString); TopDocs docs = isearcher.search(tq, numMaxHits); LinkedList<SimpleResult> res = new LinkedList<SimpleResult>(); double maxDistance = 0d; for (int i = 0; i < docs.scoreDocs.length; i++) { double d = 1d / docs.scoreDocs[i].score; maxDistance = Math.max(d, maxDistance); SimpleResult sr = new SimpleResult(d, docs.scoreDocs[i].doc); res.add(sr); } sh = new SimpleImageSearchHits(res, maxDistance); } catch (ParseException e) { System.err.println(queryString); e.printStackTrace(); } return sh; }
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { SimpleImageSearchHits sh = null; IndexSearcher isearcher = new IndexSearcher(reader); isearcher.setSimilarity(similarity); String queryString = doc.getValues(fieldName)[0]; Query tq = null; try { tq = qp.parse(queryString); TopDocs docs = isearcher.search(tq, numMaxHits); LinkedList<SimpleResult> res = new LinkedList<SimpleResult>(); double maxDistance = 0d; for (int i = 0; i < docs.scoreDocs.length; i++) { double d = 1d / docs.scoreDocs[i].score; maxDistance = Math.max(d, maxDistance); SimpleResult sr = new SimpleResult(d, docs.scoreDocs[i].doc); res.add(sr); } sh = new SimpleImageSearchHits(res, maxDistance); } catch (ParseException e) { System.err.println(queryString); e.printStackTrace(); } return sh; }
public void testSearchDocValues() throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexNameDV))); GenericDocValuesImageSearcher is = new GenericDocValuesImageSearcher(10, CEDD.class, reader); // run search StopWatch sm = new StopWatch(); BufferedImage qImage = ImageIO.read(new FileInputStream(queryImage)); ImageSearchHits hits = null; for (int i = 0; i<numRuns; i++) { sm.start(); hits = is.search(qImage, reader); sm.stop(); } // print results for (int i = 0; i < hits.length(); i++) { String fileName = reader.document(hits.documentID(i)).getValues(GlobalDocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; System.out.println(hits.score(i) + ": \t" + fileName); } System.out.printf("\nTime for searching: %02.3f ms\n", sm.getTime() / (double) numRuns); }
public void testSearchCaching() throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexName))); GenericFastImageSearcher is = new GenericFastImageSearcher(10, CEDD.class, true, reader); // run search StopWatch sm = new StopWatch(); BufferedImage qImage = ImageIO.read(new FileInputStream(queryImage)); ImageSearchHits hits = null; for (int i = 0; i<numRuns; i++) { sm.start(); hits = is.search(qImage, reader); sm.stop(); } // print results for (int i = 0; i < hits.length(); i++) { String fileName = reader.document(hits.documentID(i)).getValues(GlobalDocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; System.out.println(hits.score(i) + ": \t" + fileName); } System.out.printf("\nTime for searching: %02.3f ms\n", sm.getTime() / (double) numRuns); }
protected void init() { // put all respective features into an in-memory cache ... if (isCaching && reader != null) { Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); featureCache = new LinkedHashMap<Integer, SearchItemForEvaluation>(docs); try { Document d; for (int i = 0; i < docs; i++) { if (!(reader.hasDeletions() && !liveDocs.get(i))) { d = reader.document(i); cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes, d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length); featureCache.put(i, new SearchItemForEvaluation(cachedInstance.getByteArrayRepresentation(), new SimpleResultForEvaluation(-1d, i, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]))); } } } catch (IOException e) { e.printStackTrace(); } } }
private void testSearchSpeed(Class<? extends GlobalFeature> featureClass) throws IOException { ParallelIndexer parallelIndexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPath, testExtensive, true); parallelIndexer.addExtractor(featureClass); parallelIndexer.run(); IndexReader reader = DirectoryReader.open(new RAMDirectory(FSDirectory.open(Paths.get(indexPath)), IOContext.READONCE)); Bits liveDocs = MultiFields.getLiveDocs(reader); double queryCount = 0d; ImageSearcher searcher = new GenericFastImageSearcher(100, featureClass); long ms = System.currentTimeMillis(); String fileName; Document queryDoc; ImageSearchHits hits; for (int i = 0; i < reader.maxDoc(); i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. fileName = getIDfromFileName(reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); if (queries.keySet().contains(fileName)) { queryCount += 1d; // ok, we've got a query here for a document ... queryDoc = reader.document(i); hits = searcher.search(queryDoc, reader); } } ms = System.currentTimeMillis() - ms; System.out.printf("%s \t %3.1f \n", featureClass.getName().substring(featureClass.getName().lastIndexOf('.') + 1), (double) ms / queryCount); }
protected void init() { // put all respective features into an in-memory cache ... if (isCaching && reader != null) { Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); featureCache = new LinkedHashMap<Integer, SearchItemForEvaluation>(docs); try { Document d; for (int i = 0; i < docs; i++) { if (!(reader.hasDeletions() && !liveDocs.get(i))) { d = reader.document(i); cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes, d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length); featureCache.put(i, new SearchItemForEvaluation(cachedInstance.getByteArrayRepresentation(), new SimpleResultForEvaluation(-1d, i, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]))); } } } catch (IOException e) { e.printStackTrace(); } } }
public void testSearchPlain() throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexName))); GenericFastImageSearcher is = new GenericFastImageSearcher(10, CEDD.class, false, reader); // run search StopWatch sm = new StopWatch(); BufferedImage qImage = ImageIO.read(new FileInputStream(queryImage)); ImageSearchHits hits = null; for (int i = 0; i< numRuns; i++) { sm.start(); hits = is.search(qImage, reader); sm.stop(); } // print results for (int i = 0; i < hits.length(); i++) { String fileName = reader.document(hits.documentID(i)).getValues(GlobalDocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; System.out.println(hits.score(i) + ": \t" + fileName); } System.out.printf("\nTime for searching: %02.3f ms\n", sm.getTime() / (double) numRuns); } }