@Override public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException, InterruptedException { FileAnalyzer fa; StreamSource bzSrc = wrap(src); String path = doc.get("path"); if (path != null && (path.endsWith(".bz2") || path.endsWith(".BZ2") || path.endsWith(".bz"))) { String newname = path.substring(0, path.lastIndexOf('.')); //System.err.println("BZIPPED OF = " + newname); try (InputStream in = bzSrc.getStream()) { fa = AnalyzerGuru.getAnalyzer(in, newname); } if (!(fa instanceof BZip2Analyzer)) { if (fa.getGenre() == Genre.PLAIN || fa.getGenre() == Genre.XREFABLE) { this.g = Genre.XREFABLE; } else { this.g = Genre.DATA; } fa.analyze(doc, bzSrc, xrefOut); if (doc.get("t") != null) { doc.removeField("t"); if (g == Genre.XREFABLE) { doc.add(new Field("t", g.typeName(), AnalyzerGuru.string_ft_stored_nanalyzed_norms)); } } } } }
IndexableField[] fields = d.getFields(localFeatureFieldName); d.removeField(visualWordsFieldName); d.removeField(localFeatureHistFieldName);
IndexableField[] fields = d.getFields(localFeatureFieldName); d.removeField(vladFieldName);
doc.removeField("t"); if (g == Genre.XREFABLE) { doc.add(new Field("t", g.typeName(), AnalyzerGuru.string_ft_stored_nanalyzed_norms));
public void updateLegacyDocument( ArtifactInfo ai, Document doc ) { updateDocument( ai, doc ); // legacy! if ( ai.prefix != null ) { doc.add( new Field( ArtifactInfo.PLUGIN_PREFIX, ai.prefix, Field.Store.YES, Field.Index.UN_TOKENIZED ) ); } if ( ai.goals != null ) { doc.add( new Field( ArtifactInfo.PLUGIN_GOALS, ArtifactInfo.lst2str( ai.goals ), Field.Store.YES, Field.Index.NO ) ); } doc.removeField( ArtifactInfo.GROUP_ID ); doc.add( new Field( ArtifactInfo.GROUP_ID, ai.groupId, Field.Store.NO, Field.Index.UN_TOKENIZED ) ); }
public void updateLegacyDocument( ArtifactInfo ai, Document doc ) { updateDocument( ai, doc ); // legacy! if ( ai.getPrefix() != null ) { doc.add( new Field( ArtifactInfo.PLUGIN_PREFIX, ai.getPrefix(), Field.Store.YES, Field.Index.NOT_ANALYZED ) ); } if ( ai.getGoals() != null ) { doc.add( new Field( ArtifactInfo.PLUGIN_GOALS, ArtifactInfo.lst2str( ai.getGoals() ), Field.Store.YES, Field.Index.NO ) ); } doc.removeField( ArtifactInfo.GROUP_ID ); doc.add( new Field( ArtifactInfo.GROUP_ID, ai.getGroupId(), Field.Store.NO, Field.Index.NOT_ANALYZED ) ); }
private XdmNode getXdmNode(long docID, Document document) throws IOException { String xml = document.get(xmlFieldName); String uri = document.get(uriFieldName); BytesRef binaryValue = document.getBinaryValue(xmlFieldName); byte[] bytes; if (binaryValue != null) { bytes = binaryValue.bytes; } else { bytes = null; } XdmNode node = createXdmNode (docID, uri, xml, bytes); document.removeField(xmlFieldName); node.getUnderlyingNode().getDocumentRoot().setUserData (Document.class.getName(), document); return node; }
public void updateLegacyDocument( ArtifactInfo ai, Document doc ) { updateDocument( ai, doc ); // legacy! if ( ai.getPrefix() != null ) { doc.add( new Field( ArtifactInfo.PLUGIN_PREFIX, ai.getPrefix(), Field.Store.YES, Field.Index.NOT_ANALYZED ) ); } if ( ai.getGoals() != null ) { doc.add( new Field( ArtifactInfo.PLUGIN_GOALS, ArtifactInfo.lst2str( ai.getGoals() ), Field.Store.YES, Field.Index.NO ) ); } doc.removeField( ArtifactInfo.GROUP_ID ); doc.add( new Field( ArtifactInfo.GROUP_ID, ai.getGroupId(), Field.Store.NO, Field.Index.NOT_ANALYZED ) ); }
public void updateLegacyDocument( ArtifactInfo ai, Document doc ) { updateDocument( ai, doc ); // legacy! if ( ai.getPrefix() != null ) { doc.add( new Field( ArtifactInfo.PLUGIN_PREFIX, ai.getPrefix(), Field.Store.YES, Field.Index.NOT_ANALYZED ) ); } if ( ai.getGoals() != null ) { doc.add( new Field( ArtifactInfo.PLUGIN_GOALS, ArtifactInfo.lst2str( ai.getGoals() ), Field.Store.YES, Field.Index.NO ) ); } doc.removeField( ArtifactInfo.GROUP_ID ); doc.add( new Field( ArtifactInfo.GROUP_ID, ai.getGroupId(), Field.Store.NO, Field.Index.NOT_ANALYZED ) ); }
@Override public void document(int docID, StoredFieldVisitor visitor) throws IOException { super.document(docID, visitor); if (!(visitor instanceof DocumentStoredFieldVisitor)) { return; } Document doc = ((DocumentStoredFieldVisitor) visitor).getDocument(); Collection<FacetHandler<?>> facetHandlers = _facetHandlerMap.values(); for (FacetHandler<?> facetHandler : facetHandlers) { String[] vals = facetHandler.getFieldValues(this, docID); if (vals != null) { String[] values = doc.getValues(facetHandler.getName()); Set<String> storedVals = new HashSet<String>(Arrays.asList(values)); for (String val : vals) { storedVals.add(val); } doc.removeField(facetHandler.getName()); for (String val : storedVals) { doc.add(new StringField(facetHandler.getName(), val, Field.Store.NO)); } } } }
doc.removeField(fulltext[k].name());
private void addDocumentFields (XmlIndexer indexer, IndexSchema indexSchema, Document doc) { if (indexConfig.isOption(IndexConfiguration.STORE_TINY_BINARY)) { // remove the serialized xml field value -- we will store a TinyBinary instead doc.removeField(indexConfig.getFieldName(FieldRole.XML_STORE)); } for (FieldDefinition field : indexConfig.getFields()) { String fieldName = field.getName(); if (field == indexConfig.getField(FieldRole.URI) || field == indexConfig.getField(FieldRole.XML_STORE)) { if (doc.getField(fieldName) != null) { // uri and xml are provided externally in LuxUpdateProcessor continue; } } Iterable<?> values = field.getValues(indexer); SchemaField schemaField = indexSchema.getField(fieldName); if (values != null) { for (Object value : values) { addField(doc, schemaField, value, 1.0f); } } else { for (IndexableField value : field.getFieldValues(indexer)) { addField(doc, schemaField, value, 1.0f); } } } }