protected void parseWord6( DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException { HWPFOldDocument doc = new HWPFOldDocument(root); Word6Extractor extractor = new Word6Extractor(doc); for (String p : extractor.getParagraphText()) { xhtml.element("p", p); } }
public String getText() { try { WordToTextConverter wordToTextConverter = new WordToTextConverter(); wordToTextConverter.processDocument( doc ); return wordToTextConverter.getText(); } catch ( Exception exc ) { // fall-back StringBuffer text = new StringBuffer(); for ( String t : getParagraphText() ) { text.append( t ); } return text.toString(); } } }
public String getText() { try { WordToTextConverter wordToTextConverter = new WordToTextConverter(); wordToTextConverter.processDocument( doc ); return wordToTextConverter.getText(); } catch ( Exception exc ) { // fall-back StringBuffer text = new StringBuffer(); for ( String t : getParagraphText() ) { text.append( t ); } return text.toString(); } } }
protected void parseWord6( DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException { HWPFOldDocument doc = new HWPFOldDocument(root); Word6Extractor extractor = new Word6Extractor(doc); for (String p : extractor.getParagraphText()) { xhtml.element("p", p); } }
protected void parseWord6( DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException { HWPFOldDocument doc = new HWPFOldDocument(root); Word6Extractor extractor = new Word6Extractor(doc); for (String p : extractor.getParagraphText()) { xhtml.element("p", p); } }
private void oldWordExtraction(final InputStream inputStream, final ParserResultBuilder resultBuilder) throws IOException { Word6Extractor word6 = null; try { word6 = new Word6Extractor(inputStream); final ParserFieldsBuilder metas = resultBuilder.metas(); metas.set(MIME_TYPE, DEFAULT_MIMETYPES[0]); SummaryInformation si = word6.getSummaryInformation(); if (si != null) { metas.add(TITLE, si.getTitle()); metas.add(AUTHOR, si.getAuthor()); metas.add(SUBJECT, si.getSubject()); } final ParserFieldsBuilder document = resultBuilder.newDocument(); @SuppressWarnings("deprecation") String[] paragraphes = word6.getParagraphText(); if (paragraphes != null) for (String paragraph : paragraphes) document.add(CONTENT, paragraph); document.add(LANG_DETECTION, languageDetection(document, CONTENT, 10000)); } finally { IOUtils.closeQuietly(word6); } }