DataImporter(SolrCore core, String handlerName) { this.handlerName = handlerName; this.core = core; this.schema = core.getLatestSchema(); }
public static Object createDocObjects(Map<Comparable, Doc> fullModel, Comparator sort, int rows, Collection<String> fieldNames) { List<Doc> docList = new ArrayList<>(fullModel.values()); Collections.sort(docList, sort); List sortedDocs = new ArrayList(rows); for (Doc doc : docList) { if (sortedDocs.size() >= rows) break; Map<String,Object> odoc = toObject(doc, h.getCore().getLatestSchema(), fieldNames); sortedDocs.add(toObject(doc, h.getCore().getLatestSchema(), fieldNames)); } return sortedDocs; }
/** * analyzes string like the given field * @param field the name of the field * @param value the string to analyze * @return the analyzed string */ public static String analyzeString(SolrCore core, String field, String value) { try { StringBuilder b = new StringBuilder(); try (TokenStream ts = core.getLatestSchema().getFieldType(field).getQueryAnalyzer().tokenStream(field, new StringReader(value))) { ts.reset(); while (ts.incrementToken()) { b.append(" "); CharTermAttribute attr = ts.getAttribute(CharTermAttribute.class); b.append(attr); } } return b.toString().trim(); } catch (IOException e) { //FIXME: This error should be properly logged! e.printStackTrace(); return value; } }
solrStopWords.put(fieldName, new ArrayList<>()); IndexSchema schema = core.getLatestSchema(); final Analyzer fieldAnalyzer = schema.getFieldType(fieldName).getIndexAnalyzer(); if (fieldAnalyzer instanceof TokenizerChain) {
private void addSchemaField(FieldMapper mapper, FieldEntry entry) { if (!core.getLatestSchema().isMutable()) { final String message = "This IndexSchema is not mutable."; throw new SolrException(BAD_REQUEST, message); final IndexSchema oldSchema = core.getLatestSchema(); if (oldSchema.getFieldTypeNoEx(mapper.getTargetFieldname(entry)) != null) { return; // the field already exists in the schema SchemaField field = mapper.getSchemaField(core.getLatestSchema(), entry); IndexSchema newSchema = oldSchema.addField(field); if (newSchema != null) {
IndexSchema schema = core.getLatestSchema(); Map<String, SchemaField> solrFields = schema.getFields(); Set<String> schemeFieldnames = new HashSet<String>();
lookupImpl = (String)config.get(LOOKUP_IMPL); IndexSchema schema = core.getLatestSchema(); suggestionAnalyzerFieldTypeName = (String)config.get(SUGGESTION_ANALYZER_FIELDTYPE); if (schema.getFieldTypes().containsKey(suggestionAnalyzerFieldTypeName)) {
IndexSchema schema = core.getLatestSchema(); String fieldTypeName = (String) initParams.get("queryAnalyzerFieldType"); FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
public void inform(SolrCore core) { schema = core.getLatestSchema(); // XML_STORE is not listed explicitly by the indexer informField (indexConfig.getField(FieldRole.XML_STORE), core); // This must be run before informField() registers default analyzers with the Schema registerXmlTextFields(); for (FieldDefinition xmlField : indexConfig.getFields()) { informField (xmlField, core); } if (xpathFieldConfig != null) { addXPathFields(); } SchemaField uniqueKeyField = schema.getUniqueKeyField(); if (uniqueKeyField == null) { logger.error("{} schema does not define any unique field", core.getName()); } else if (! uniqueKeyField.getName().equals(indexConfig.getFieldName(FieldRole.URI))) { logger.error("{} schema defines a different unique field than the uri field declared in lux configuration", core.getName()); } // must call this after making changes to the field map: schema.refreshAnalyzers(); }
@Override public void processAdd(AddUpdateCommand cmd) throws IOException { SolrCore core = cmd.getReq().getCore(); IndexSchema schema = core.getLatestSchema();
/** * Test that the ContentHandler properly strips the illegal characters */ @Test public void testTransformValue() { String fieldName = "user_name"; assertFalse("foobar".equals(getFoobarWithNonChars())); Metadata metadata = new Metadata(); // load illegal char string into a metadata field and generate a new document, // which will cause the ContentHandler to be invoked. metadata.set(fieldName, getFoobarWithNonChars()); StripNonCharSolrContentHandlerFactory contentHandlerFactory = new StripNonCharSolrContentHandlerFactory(ExtractionDateUtil.DEFAULT_DATE_FORMATS); IndexSchema schema = h.getCore().getLatestSchema(); SolrContentHandler contentHandler = contentHandlerFactory.createSolrContentHandler(metadata, new MapSolrParams(new HashMap()), schema); SolrInputDocument doc = contentHandler.newDocument(); String foobar = doc.getFieldValue(fieldName).toString(); assertTrue("foobar".equals(foobar)); }
@Test public void testSirenFieldType() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); assertNotNull(ntriple); final FieldType tmp = ntriple.getType(); assertTrue(tmp instanceof ExtendedJsonField); }
@Test public void testConciseSirenFieldType() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); SchemaField json = schema.getField("concise"); assertNotNull(json); FieldType tmp = json.getType(); assertTrue(tmp instanceof ConciseJsonField); json = schema.getField("concise-attribute-wildcard"); assertNotNull(json); tmp = json.getType(); assertTrue(tmp instanceof ConciseJsonField); }
SchemaField idField = core.getLatestSchema().getUniqueKeyField(); FieldType fieldType = idField.getType(); boolean openedRealTimeSearcher = false; if (docid < 0) continue; Document luceneDocument = searcher.doc(docid); SolrDocument newDoc = toSolrDoc(luceneDocument, core.getLatestSchema()); if( transformer != null ) { transformer.transform(newDoc, docid);
@Test public void testSirenFieldTypeProperties() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final FieldType type = schema.getField("json").getType(); assertTrue(type instanceof ExtendedJsonField); assertFalse(type.isMultiValued()); assertTrue(type.isTokenized()); assertEquals(type.getPostingsFormat(), Siren10AForPostingsFormat.NAME); }
@Test public void testSirenFieldDatatypeAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[0]; assertNotNull(f.getDatatypeAnalyzers()); assertEquals(9, f.getDatatypeAnalyzers().size()); assertNotNull(f.getDatatypeAnalyzers().get("http://json.org/field")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://json.org/field"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int")); assertTrue(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int") instanceof IntNumericAnalyzer); final IntNumericAnalyzer a = (IntNumericAnalyzer) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int"); assertEquals(8, a.getPrecisionStep()); assertEquals(32, a.getNumericParser().getValueSize()); assertEquals(NumericType.INT, a.getNumericParser().getNumericType()); }
public void testCustomDatatypeField() throws Exception { String json = "{\"rating\": {\"_datatype_\": \"http://www.w3.org/2001/XMLSchema#double\", \"_value_\":\"5.4\"}}"; IndexSchema schema = h.getCore().getLatestSchema(); schema = h.getCore().getLatestSchema(); assertNotNull(schema.getFieldOrNull("double.json.rating")); assertEquals("tdouble", schema.getFieldType("double.json.rating").getTypeName()); schema = h.getCore().getLatestSchema(); d = processAdd("generate-facets-processor", doc(f("id", "2"), f("json", json))); assertNotNull(d); schema = h.getCore().getLatestSchema(); assertNotNull(schema.getFieldOrNull("double.json.rating")); assertEquals("tdouble", schema.getFieldType("double.json.rating").getTypeName());
public void testStringField() throws Exception { String json = "{\"knows\": [{\"name\":\"josef\"}, {\"name\":\"szymon\"}]}"; IndexSchema schema = h.getCore().getLatestSchema(); schema = h.getCore().getLatestSchema(); assertNotNull(schema.getFieldOrNull("string.json.knows.name")); assertEquals("string", schema.getFieldType("string.json.knows.name").getTypeName()); schema = h.getCore().getLatestSchema(); assertNotNull(schema.getFieldOrNull("string.json.knows.name")); assertEquals("string", schema.getFieldType("string.json.knows.name").getTypeName()); schema = h.getCore().getLatestSchema(); assertNotNull(schema.getFieldOrNull("string.json.knows.name")); assertEquals("string", schema.getFieldType("string.json.knows.name").getTypeName()); schema = h.getCore().getLatestSchema(); assertNotNull(schema.getFieldOrNull("long.json.age")); assertEquals("tlong", schema.getFieldType("long.json.age").getTypeName()); schema = h.getCore().getLatestSchema(); assertNotNull(schema.getFieldOrNull("double.json.length")); assertEquals("tdouble", schema.getFieldType("double.json.length").getTypeName()); schema = h.getCore().getLatestSchema(); assertNull(schema.getFieldOrNull("string.json.description")); assertNull(d.getFieldValue("string.json.description"));
@Test public void testSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof ExtendedJsonTokenizerFactory); // 3 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(3, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof SirenPayloadFilterFactory); }
@Test public void testConciseSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField json = schema.getField("concise"); final FieldType tmp = json.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof ConciseJsonTokenizerFactory); // 4 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(4, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PathEncodingFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[3] instanceof SirenPayloadFilterFactory); }