/** * @see org.opencms.search.extractors.I_CmsTextExtractor#extractText(java.io.InputStream, java.lang.String) */ @Override public I_CmsExtractionResult extractText(InputStream in, String encoding) throws Exception { String result = ""; try { if (CmsStringUtil.isEmpty(encoding)) { encoding = OpenCms.getSystemInfo().getDefaultEncoding(); } result = CmsHtmlExtractor.extractText(in, encoding); result = removeControlChars(result); } catch (Exception e) { if (LOG.isErrorEnabled()) { LOG.error(Messages.get().container(Messages.LOG_EXTRACT_TEXT_ERROR_0), e); } } return new CmsExtractionResult(result); } }
/** * Returns the raw text content of a given VFS resource containing HTML data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { CmsFile file = readFile(cms, resource); try { CmsProperty encProp = cms.readPropertyObject( resource, CmsPropertyDefinition.PROPERTY_CONTENT_ENCODING, true); String encoding = encProp.getValue(OpenCms.getSystemInfo().getDefaultEncoding()); return CmsExtractorHtml.getExtractor().extractText(file.getContents(), encoding); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * Returns the raw text content of a given VFS resource containing HTML data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { logContentExtraction(resource, index); CmsFile file = readFile(cms, resource); try { CmsProperty encProp = cms.readPropertyObject( resource, CmsPropertyDefinition.PROPERTY_CONTENT_ENCODING, true); String encoding = encProp.getValue(OpenCms.getSystemInfo().getDefaultEncoding()); return CmsExtractorHtml.getExtractor().extractText(file.getContents(), encoding); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * @see org.opencms.search.extractors.I_CmsTextExtractor#extractText(java.io.InputStream, java.lang.String) */ @Override public I_CmsExtractionResult extractText(InputStream in, String encoding) throws Exception { String result = ""; try { if (CmsStringUtil.isEmpty(encoding)) { encoding = OpenCms.getSystemInfo().getDefaultEncoding(); } result = CmsHtmlExtractor.extractText(in, encoding); result = removeControlChars(result); } catch (Exception e) { if (LOG.isErrorEnabled()) { LOG.error(Messages.get().container(Messages.LOG_EXTRACT_TEXT_ERROR_0), e); } } return new CmsExtractionResult(result); } }