/** * * @return */ public String getWaybackDate() { return (String) getField(SolrFields.WAYBACK_DATE).getFirstValue(); }
/** * * @return */ public String getHash() { return (String) getField(SolrFields.HASH).getFirstValue(); }
/** * * @return */ public String getHash() { return (String) getField(SolrFields.HASH).getFirstValue(); } }
/** * * @return */ public String getHost() { return (String) getField(SolrFields.SOLR_HOST).getFirstValue(); }
/** * * @return */ public String getUrl() { return (String) getField(SolrFields.SOLR_URL).getFirstValue(); }
/** * * @return */ public String getWaybackDate() { return (String) getField(SolrFields.WAYBACK_DATE).getFirstValue(); }
/** * * @return */ public String getUrl() { return (String) getField(SolrFields.SOLR_URL).getFirstValue(); }
/** * Get a string containing the format as determined by three different * techniques: * * @return */ public String getFormatResults() { StringBuilder sb = new StringBuilder(); // As Served: SolrInputField served = getField(SolrFields.CONTENT_TYPE_SERVED); if (served != null) { sb.append((String) served.getFirstValue()); } // Tika: sb.append("\t"); SolrInputField tika = getField(SolrFields.CONTENT_TYPE_TIKA); if (tika != null) { sb.append((String) tika.getFirstValue()); } // DROID: sb.append("\t"); SolrInputField droid = getField(SolrFields.CONTENT_TYPE_DROID); if (droid != null) { sb.append((String) droid.getFirstValue()); } return sb.toString(); }
/** * Run all configured analysers on the text. * * @param text * @param solr */ public void analyse( SolrRecord solr ) { final long start = System.nanoTime(); // Pull out the text: if( solr.getField( SolrFields.SOLR_EXTRACTED_TEXT ) != null ) { String text = ( String ) solr.getField( SolrFields.SOLR_EXTRACTED_TEXT ).getFirstValue(); text = text.trim(); if( !"".equals( text ) ) { for( AbstractTextAnalyser ta : analysers ) { ta.analyse(text, solr); } } } Instrument.timeRel("WARCIndexer.extract#total", "TextAnalyzers#total", start); }
/** * Get the host->host links: */ public List<String> getHostLinks() { SolrInputField links = getField(SolrFields.SOLR_LINKS_HOSTS); if (links == null || links.getValueCount() == 0) return null; // Otherwise, build a list: List<String> hl = new ArrayList<String>(); for (Object v : links.getValues()) { hl.add(getHost() + "\t" + (String) v); } return hl; } }
/** * Run all configured analysers on the text. * * @param text * @param solr */ public void analyse( SolrRecord solr ) { final long start = System.nanoTime(); // Pull out the text: if( solr.getField( SolrFields.SOLR_EXTRACTED_TEXT ) != null ) { String text = ( String ) solr.getField( SolrFields.SOLR_EXTRACTED_TEXT ).getFirstValue(); text = text.trim(); if( !"".equals( text ) ) { for( AbstractTextAnalyser ta : analysers ) { if (ta.isEnabled()) { ta.analyse(text, solr); } } } } Instrument.timeRel("WARCIndexer.extract#total", "TextAnalyzers#total", start); }
/** * Get the list of faces and the item identifier: */ public List<String> getFaces() { SolrInputField faces = getField(SolrFields.IMAGE_FACES); if (faces == null || faces.getValueCount() == 0) return null; // Otherwise, list 'em: List<String> hl = new ArrayList<String>(); this.gatherMatches(faces.getValues(), "cat", hl); this.gatherMatches(faces.getValues(), "human", hl); return hl; }
&& solr.getField(SolrFields.CONTENT_TYPE_SERVED) == null) { String servedType = h.getValue(); if (servedType.length() > 200)
&& solr.getField(SolrFields.CONTENT_TYPE_SERVED) == null) { String servedType = h.getValue(); if (servedType.length() > 200)
.getField(SolrFields.SOLR_CONTENT_TYPE).getValue(); if (provider.shouldProcess(mimeType)) { try {
String mime = ( String ) solr.getField( SolrFields.SOLR_CONTENT_TYPE ).getValue(); if( mime.startsWith( "text" ) || mime.startsWith("application/xhtml+xml") ) { html.analyse(header, tikainput, solr);
} else { log.info("DUMMY_RUN: Skipping addition of doc: " + solr.getField("id").getFirstValue());