@Override public void run() { try { QueueItem data = queue.take(); while (data.id != null) { document.clear(); document.put(DocumentBuilder.FIELD_NAME_IDENTIFIER, data.id); document.put("title", data.id); for (Iterator<GlobalFeature> iterator = data.features.iterator(); iterator.hasNext(); ) { GlobalFeature f = iterator.next(); document.put(f.getFieldName(), f.getByteArrayRepresentation()); if (doHashingBitSampling) { document.put(f.getFieldName() + DocumentBuilder.HASH_FIELD_SUFFIX, SerializationUtils.arrayToString((BitSampling.generateHashes(f.getFeatureVector())))); } else if (doMetricSpaceIndexing) { if (MetricSpaces.supportsFeature(f)) { document.put(f.getFieldName() + DocumentBuilder.HASH_FIELD_SUFFIX, MetricSpaces.generateHashString(f)); } } } output(document); data = queue.take(); } } catch (InterruptedException e) { e.printStackTrace(); } }
document.add(new TextField(featureFieldName + "_hash", SerializationUtils.arrayToString(hashes), Field.Store.YES));
document.add(new TextField(featureFieldName + "_hash", SerializationUtils.arrayToString(hashes), Field.Store.YES));
@Override public void run() { try { QueueItem data = queue.take(); while (data.id != null) { document.clear(); document.put(DocumentBuilder.FIELD_NAME_IDENTIFIER, data.id); document.put("title", data.id); for (Iterator<GlobalFeature> iterator = data.features.iterator(); iterator.hasNext(); ) { GlobalFeature f = iterator.next(); document.put(f.getFieldName(), f.getByteArrayRepresentation()); if (doHashingBitSampling) { document.put(f.getFieldName() + DocumentBuilder.HASH_FIELD_SUFFIX, SerializationUtils.arrayToString((BitSampling.generateHashes(f.getFeatureVector())))); } else if (doMetricSpaceIndexing) { if (MetricSpaces.supportsFeature(f)) { document.put(f.getFieldName() + DocumentBuilder.HASH_FIELD_SUFFIX, MetricSpaces.generateHashString(f)); } } } output(document); data = queue.take(); } } catch (InterruptedException e) { e.printStackTrace(); } }
if (hashingMode == HashingMode.BitSampling) { hashes = BitSampling.generateHashes(globalFeature.getFeatureVector()); hash = new TextField(extractorItems.get(extractorItem)[1], SerializationUtils.arrayToString(hashes), Field.Store.YES); } else if (hashingMode == HashingMode.LSH) { hashes = LocalitySensitiveHashing.generateHashes(globalFeature.getFeatureVector()); hash = new TextField(extractorItems.get(extractorItem)[1], SerializationUtils.arrayToString(hashes), Field.Store.YES); } else if (hashingMode == HashingMode.MetricSpaces) { if (MetricSpaces.supportsFeature(globalFeature)) {
if (hashingMode == HashingMode.BitSampling) { hashes = BitSampling.generateHashes(globalFeature.getFeatureVector()); hash = new TextField(extractorItems.get(extractorItem)[1], SerializationUtils.arrayToString(hashes), Field.Store.YES); } else if (hashingMode == HashingMode.LSH) { hashes = LocalitySensitiveHashing.generateHashes(globalFeature.getFeatureVector()); hash = new TextField(extractorItems.get(extractorItem)[1], SerializationUtils.arrayToString(hashes), Field.Store.YES); } else if (hashingMode == HashingMode.MetricSpaces) { if (MetricSpaces.supportsFeature(globalFeature)) {
/** * Overwrite this method if you want to filter the input, apply hashing, etc. * * @param feature the current feature. * @param document the current document. * @param featureFieldName the field hashFunctionsFileName of the feature. */ protected void addToDocument(LireFeature feature, Document document, String featureFieldName) { if (run == 0) { } // just count documents else if (run == 1) { // Select the representatives ... if (representativesID.contains(docCount) && feature.getClass().getCanonicalName().equals(featureClass.getCanonicalName())) { // it's a representative. // put it into a temporary data structure ... representatives.add(feature); } } else if (run == 2) { // actual hashing: find the nearest representatives and put those as a hash into a document. if (feature.getClass().getCanonicalName().equals(featureClass.getCanonicalName())) { // it's a feature to be hashed document.add(new TextField(featureFieldName + "_hash", SerializationUtils.arrayToString(getHashes(feature)), Field.Store.YES)); } document.add(new StoredField(featureFieldName, feature.getByteArrayRepresentation())); } }
document.add(new TextField(featureFieldName + "_hash", SerializationUtils.arrayToString(hashes), Field.Store.YES));
hashes = LocalitySensitiveHashing.generateHashes(lireFeature.getDoubleHistogram()); result[1] = new TextField(fieldName + HASH_FIELD_SUFFIX, SerializationUtils.arrayToString(hashes), Field.Store.YES); } else System.err.println("Could not create hashes, feature vector too long: " + lireFeature.getDoubleHistogram().length + " (" + lireFeature.getClass().getName() + ")");
/** * Takes one single document and creates the visual words and adds them to the document. The same document is returned. * * @param d the document to use for adding the visual words * @return * @throws IOException */ public Document getVisualWords(Document d) throws IOException { clusters = Cluster.readClusters(clusterFile); int[] tmpHist = new int[clusters.length]; LireFeature f = getFeatureInstance(); IndexableField[] fields = d.getFields(localFeatureFieldName); // find the appropriate cluster for each feature: for (int j = 0; j < fields.length; j++) { f.setByteArrayRepresentation(fields[j].binaryValue().bytes, fields[j].binaryValue().offset, fields[j].binaryValue().length); tmpHist[clusterForFeature((Histogram) f)]++; } normalize(tmpHist); d.add(new TextField(visualWordsFieldName, arrayToVisualWordString(tmpHist), Field.Store.YES)); d.add(new StringField(localFeatureHistFieldName, SerializationUtils.arrayToString(tmpHist), Field.Store.YES)); d.removeFields(localFeatureFieldName); return d; }
d.add(new StringField(localFeatureHistFieldName, SerializationUtils.arrayToString(tmpHist), Field.Store.YES));
d.add(new StringField(localFeatureHistFieldName, SerializationUtils.arrayToString(tmpHist), Field.Store.YES));
context.externalValue(SerializationUtils.arrayToString(hashVals)); hashMapper.parse(context);