try { originalSource = textExtractor.extractText(content1).getContent(); copySource = textExtractor.extractText(content2).getContent(); } catch (Exception e) {
/** * Returns the raw text content of a given vfs resource containing MS Excel data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { CmsFile file = readFile(cms, resource); try { return CmsExtractorMsExcel.getExtractor().extractText(file.getContents()); } catch (Exception e) { if (e instanceof FileNotFoundException) { if ((e.getMessage() != null) && (e.getMessage().indexOf("Workbook") > 0)) { // special case: catch Excel95 format error throw new CmsIndexException(Messages.get().container( Messages.ERR_NO_EXCEL_FORMAT_1, resource.getRootPath()), e); } } throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * Returns the raw text content of a given vfs resource containing MS Word data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { CmsFile file = readFile(cms, resource); try { return CmsExtractorMsWord.getExtractor().extractText(file.getContents()); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * Returns the raw text content of a given vfs resource containing RTF data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsException { CmsFile file = readFile(cms, resource); try { return CmsExtractorRtf.getExtractor().extractText(file.getContents()); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * Returns the raw text content of a given vfs resource containing MS PowerPoint data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { CmsFile file = readFile(cms, resource); try { return CmsExtractorMsPowerPoint.getExtractor().extractText(file.getContents()); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * Returns the raw text content of a given vfs resource containing MS Word data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { CmsFile file = readFile(cms, resource); try { return CmsExtractorOpenOffice.getExtractor().extractText(file.getContents()); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * Returns the raw text content of a given vfs resource containing MS Word data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { logContentExtraction(resource, index); CmsFile file = readFile(cms, resource); try { return CmsExtractorMsOfficeOLE2.getExtractor().extractText(file.getContents()); } catch (Throwable e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * Returns the raw text content of a given vfs resource containing MS Word data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { logContentExtraction(resource, index); CmsFile file = readFile(cms, resource); try { return CmsExtractorMsOfficeOOXML.getExtractor().extractText(file.getContents()); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * Returns the raw text content of a given vfs resource containing MS Word data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { logContentExtraction(resource, index); CmsFile file = readFile(cms, resource); try { return CmsExtractorOpenOffice.getExtractor().extractText(file.getContents()); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * Returns the raw text content of a given vfs resource containing RTF data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsException { logContentExtraction(resource, index); CmsFile file = readFile(cms, resource); try { return CmsExtractorRtf.getExtractor().extractText(file.getContents()); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * Returns the raw text content of a given VFS resource containing HTML data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { CmsFile file = readFile(cms, resource); try { CmsProperty encProp = cms.readPropertyObject( resource, CmsPropertyDefinition.PROPERTY_CONTENT_ENCODING, true); String encoding = encProp.getValue(OpenCms.getSystemInfo().getDefaultEncoding()); return CmsExtractorHtml.getExtractor().extractText(file.getContents(), encoding); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * Returns the raw text content of a given VFS resource containing HTML data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { logContentExtraction(resource, index); CmsFile file = readFile(cms, resource); try { CmsProperty encProp = cms.readPropertyObject( resource, CmsPropertyDefinition.PROPERTY_CONTENT_ENCODING, true); String encoding = encProp.getValue(OpenCms.getSystemInfo().getDefaultEncoding()); return CmsExtractorHtml.getExtractor().extractText(file.getContents(), encoding); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
try { originalSource = textExtractor.extractText(content1).getContent(); copySource = textExtractor.extractText(content2).getContent(); } catch (Exception e) {
originalSource = textExtractor.extractText(content1).getContent(); copySource = textExtractor.extractText(content2).getContent(); } else if ((resourceType instanceof CmsResourceTypePlain) || (resourceType instanceof CmsResourceTypeJsp)) { originalSource = new String(content1);
/** * Returns the raw text content of a given vfs resource containing Adobe PDF data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { CmsFile file = readFile(cms, resource); try { return CmsExtractorPdf.getExtractor().extractText(file.getContents()); } catch (Exception e) { if (e instanceof CryptographyException) { throw new CmsIndexException(Messages.get().container( Messages.ERR_DECRYPTING_RESOURCE_1, resource.getRootPath()), e); } if (e instanceof InvalidPasswordException) { // default password "" was wrong. throw new CmsIndexException(Messages.get().container( Messages.ERR_PWD_PROTECTED_1, resource.getRootPath()), e); } throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * Returns the raw text content of a given vfs resource containing Adobe PDF data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { logContentExtraction(resource, index); CmsFile file = readFile(cms, resource); try { return CmsExtractorPdf.getExtractor().extractText(file.getContents()); } catch (Exception e) { if (e instanceof CryptographyException) { throw new CmsIndexException( Messages.get().container(Messages.ERR_DECRYPTING_RESOURCE_1, resource.getRootPath()), e); } if (e instanceof InvalidPasswordException) { // default password "" was wrong. throw new CmsIndexException( Messages.get().container(Messages.ERR_PWD_PROTECTED_1, resource.getRootPath()), e); } throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }