@Override public void run() { try { input.reset(); parser.parse( input, null, metadata, null ); } catch( Exception e ) { log.error( parser.getClass().getName()+".parse(): " + e.getMessage() ); // Also record as a Solr PARSE_ERROR solr.addParseException("when parsing with " + parser.getClass().getName(), e); } } }
@Override public void run() { try { input.reset(); parser.parse( input, null, metadata, null ); } catch( Exception e ) { log.error(parser.getClass().getName() + ".parse(): " + e.getMessage(), e); // Also record as a Solr PARSE_ERROR solr.addParseException("when parsing with " + parser.getClass().getName(), e); } } }
@Override public void analyse(String text, SolrRecord solr) { final long start = System.nanoTime(); try { LanguageResult li = ld.detect(text); if (li != null) { solr.addField(SolrFields.CONTENT_LANGUAGE, li.getLanguage()); } } catch (IllegalArgumentException e) { log.error("Exception when determining language of this item: " + e.getMessage(), e); solr.addParseException(e); } Instrument.timeRel("TextAnalyzers#total", "LanguageAnalyzer#total", start); }
@Override public void analyse(String text, SolrRecord solr) { if (!enabled) { return; } final long start = System.nanoTime(); try { LanguageResult li = ld.detect(text); if (li != null) { solr.addField(SolrFields.CONTENT_LANGUAGE, li.getLanguage()); } } catch (IllegalArgumentException e) { log.error("Exception when determining language of this item: " + e.getMessage(), e); solr.addParseException(e); } Instrument.timeRel("TextAnalyzers#total", "LanguageAnalyzer#total", start); }
solr.addParseException("when parsing statusCode", e); } catch( Exception e ) { log.error( "Exception when parsing headers: " + e ); solr.addParseException("when parsing headers", e);
solr.addParseException("when parsing statusCode", e); } catch( Exception e ) { log.error( "Exception when parsing headers: " + e ); solr.addParseException("when parsing headers", e);
@Override public void analyse(String source, ArchiveRecordHeader header, InputStream tikainput, SolrRecord solr) { final long start = System.nanoTime(); Metadata metadata = new Metadata(); // Also attempt to grab the XML Root NS: if( this.extractXMLRootNamespace ) { ParseRunner parser = new ParseRunner( xrns, tikainput, metadata, solr ); try { TimeLimiter.run(parser, 30000L, false); } catch( Exception e ) { log.error( "WritableSolrRecord.extract(): " + e.getMessage() ); solr.addParseException("when parsing for XML Root Namespace", e); } solr.addField( SolrFields.XML_ROOT_NS, metadata.get(XMLRootNamespaceParser.XML_ROOT_NS)); } Instrument.timeRel("WARCPayloadAnalyzers.analyze#total","XMLAnalyzer.analyze", start); }
@Override public void analyse(ArchiveRecordHeader header, InputStream tikainput, SolrRecord solr) { final long start = System.nanoTime(); Metadata metadata = new Metadata(); // Also attempt to grab the XML Root NS: if( this.extractXMLRootNamespace ) { ParseRunner parser = new ParseRunner( xrns, tikainput, metadata, solr ); try { TimeLimiter.run(parser, 30000L, false); } catch( Exception e ) { log.error( "WritableSolrRecord.extract(): " + e.getMessage() ); solr.addParseException("when parsing for XML Root Namespace", e); } solr.addField( SolrFields.XML_ROOT_NS, metadata.get(XMLRootNamespaceParser.XML_ROOT_NS)); } Instrument.timeRel("WARCPayloadAnalyzers.analyze#total","XMLAnalyzer.analyze", start); }
} catch (Exception e) { log.error("WritableSolrRecord.extract(): " + e.getMessage()); solr.addParseException("when parsing with Apache Preflight", e);
} catch (Exception e) { log.warn("Exception on record " + url + " from " + inFile.getName(), e); doc.addParseException(e); continue; } catch (OutOfMemoryError e) { log.warn("OutOfMemoryError on record " + url + " from " + inFile.getName(), e); doc.addParseException(e);
} catch (Exception e) { log.error("WritableSolrRecord.extract(): " + e.getMessage()); solr.addParseException("when parsing with Apache Preflight", e);
} catch (Exception e) { log.warn("Exception on record " + url + " from " + inFile.getName(), e); doc.addParseException(e); continue; } catch (OutOfMemoryError e) { log.warn("OutOfMemoryError on record " + url + " from " + inFile.getName(), e); doc.addParseException(e);
LOG.error(e.getClass().getName() + ": " + e.getMessage() + "; " + header.getUrl() + "; " + header.getOffset()); reporter.incrCounter(MyCounters.NUM_ERRORS, 1); solr.addParseException(e); } catch (OutOfMemoryError e) { LOG.error("OOME " + e.getClass().getName() + ": " + e.getMessage() + "; " + header.getUrl() + "; " + header.getOffset()); reporter.incrCounter(MyCounters.NUM_ERRORS, 1); solr.addParseException(e);
solr.addParseException(e); solr.addParseException(e);
solr.addParseException(e); solr.addParseException(e);
} catch( Exception e ) { log.error( "WritableSolrRecord.extract(): " + e.getMessage() ); solr.addParseException("when parsing as HTML", e);
} catch( Exception e ) { log.error( "WritableSolrRecord.extract(): " + e.getMessage() ); solr.addParseException("when parsing as HTML", e);
.get(TikaPayloadAnalyser.TIKA_PARSE_EXCEPTION); if (tikaException != null) { solr.addParseException(tikaException, new RuntimeException("Exception from Tika"));
} catch (Exception e) { log.error("WritableSolrRecord.extract(): " + e.getMessage()); solr.addParseException("when scanning for faces", e);
} catch (Exception e) { log.error("WritableSolrRecord.extract(): " + e.getMessage()); solr.addParseException("when scanning for faces", e);