private void parse(SummaryInformation summary) { set(TikaCoreProperties.TITLE, summary.getTitle()); addMulti(metadata, TikaCoreProperties.CREATOR, summary.getAuthor()); //make sure these are retrievable specifically add(Office.KEYWORDS, summary.getKeywords()); add(OfficeOpenXMLCore.SUBJECT, summary.getSubject()); set(TikaCoreProperties.MODIFIER, summary.getLastAuthor()); set(TikaCoreProperties.COMMENTS, summary.getComments()); set(OfficeOpenXMLExtended.TEMPLATE, summary.getTemplate()); set(OfficeOpenXMLExtended.APPLICATION, summary.getApplicationName()); set(OfficeOpenXMLCore.REVISION, summary.getRevNumber()); set(TikaCoreProperties.CREATED, summary.getCreateDateTime()); set(TikaCoreProperties.MODIFIED, summary.getLastSaveDateTime()); set(TikaCoreProperties.PRINT_DATE, summary.getLastPrinted()); set(OfficeOpenXMLExtended.TOTAL_TIME, Long.toString(summary.getEditTime())); set(OfficeOpenXMLExtended.DOC_SECURITY, summary.getSecurity()); // New style counts set(Office.WORD_COUNT, summary.getWordCount()); set(Office.CHARACTER_COUNT, summary.getCharCount()); set(Office.PAGE_COUNT, summary.getPageCount()); if (summary.getPageCount() > 0) { metadata.set(PagedText.N_PAGES, summary.getPageCount()); } }
/** * Creates a new summary information. * * @return the new summary information. */ public static SummaryInformation newSummaryInformation() { return new SummaryInformation(); }
if (si.getLastAuthor() != null && si.getLastAuthor().length() > 0) props.put(DCMetaData.CONTRIBUTOR, si.getLastAuthor()); if (si.getComments() != null && si.getComments().length() > 0) props.put(DCMetaData.DESCRIPTION, si.getComments()); if (si.getCreateDateTime() != null) props.put(DCMetaData.DATE, si.getCreateDateTime()); if (si.getAuthor() != null && si.getAuthor().length() > 0) props.put(DCMetaData.CREATOR, si.getAuthor()); if (si.getKeywords() != null && si.getKeywords().length() > 0) props.put(DCMetaData.SUBJECT, si.getKeywords()); if (si.getLastSaveDateTime() != null) props.put(DCMetaData.DATE, si.getLastSaveDateTime()); if (si.getSubject() != null && si.getSubject().length() > 0) props.put(DCMetaData.SUBJECT, si.getSubject()); if (si.getTitle() != null && si.getTitle().length() > 0)
SummaryInformation summaryInformation = new SummaryInformation(ps); ph.setProjectTitle(summaryInformation.getTitle()); ph.setSubject(summaryInformation.getSubject()); ph.setAuthor(summaryInformation.getAuthor()); ph.setKeywords(summaryInformation.getKeywords()); ph.setComments(summaryInformation.getComments()); ph.setTemplate(summaryInformation.getTemplate()); ph.setLastAuthor(summaryInformation.getLastAuthor()); ph.setRevision(NumberHelper.parseInteger(summaryInformation.getRevNumber())); ph.setCreationDate(summaryInformation.getCreateDateTime()); ph.setLastSaved(summaryInformation.getLastSaveDateTime()); ph.setShortApplicationName(summaryInformation.getApplicationName()); ph.setEditingTime(Integer.valueOf((int) summaryInformation.getEditTime())); ph.setLastPrinted(summaryInformation.getLastPrinted());
protected void processDocumentInformation( SummaryInformation summaryInformation ) { if ( ExcelToFoUtils.isNotEmpty( summaryInformation.getTitle() ) ) foDocumentFacade.setTitle( summaryInformation.getTitle() ); if ( ExcelToFoUtils.isNotEmpty( summaryInformation.getAuthor() ) ) foDocumentFacade.setCreator( summaryInformation.getAuthor() ); if ( ExcelToFoUtils.isNotEmpty( summaryInformation.getKeywords() ) ) foDocumentFacade.setKeywords( summaryInformation.getKeywords() ); if ( ExcelToFoUtils.isNotEmpty( summaryInformation.getComments() ) ) foDocumentFacade.setDescription( summaryInformation.getComments() ); }
static byte[] process(File docFile) throws Exception { final HWPFDocumentCore wordDocument = AbstractWordUtils.loadDoc(docFile); SummaryInformation summaryInformation = wordDocument.getSummaryInformation(); System.out.println(summaryInformation.getAuthor()); System.out.println(summaryInformation.getApplicationName() + ":" + summaryInformation.getTitle()); Thumbnail thumbnail = new Thumbnail(summaryInformation.getThumbnail()); System.out.println(thumbnail.getClipboardFormat()); System.out.println(thumbnail.getClipboardFormatTag()); return thumbnail.getThumbnailAsWMF(); }
/** * {@inheritDoc} */ @Override public void parse(InputStream inputStream, long filesize, ContentParserOptions options, MutableGenericContext context) throws Exception { POIFSFileSystem poiFs = new POIFSFileSystem(inputStream); SummaryInformation summaryInfo = (SummaryInformation) PropertySetFactory.create(poiFs .createDocumentInputStream(SummaryInformation.DEFAULT_STREAM_NAME)); String title = summaryInfo.getTitle(); if (title != null) { context.setVariable(VARIABLE_NAME_TITLE, title); } String author = summaryInfo.getAuthor(); if (author != null) { context.setVariable(VARIABLE_NAME_CREATOR, author); } String keywords = summaryInfo.getKeywords(); if (keywords != null) { context.setVariable(VARIABLE_NAME_KEYWORDS, keywords); } context.setVariable(VARIABLE_NAME_TEXT, extractText(poiFs, filesize, options)); }
private void oldWordExtraction(final InputStream inputStream, final ParserResultBuilder resultBuilder) throws IOException { Word6Extractor word6 = null; try { word6 = new Word6Extractor(inputStream); final ParserFieldsBuilder metas = resultBuilder.metas(); metas.set(MIME_TYPE, DEFAULT_MIMETYPES[0]); SummaryInformation si = word6.getSummaryInformation(); if (si != null) { metas.add(TITLE, si.getTitle()); metas.add(AUTHOR, si.getAuthor()); metas.add(SUBJECT, si.getSubject()); } final ParserFieldsBuilder document = resultBuilder.newDocument(); @SuppressWarnings("deprecation") String[] paragraphes = word6.getParagraphText(); if (paragraphes != null) for (String paragraph : paragraphes) document.add(CONTENT, paragraph); document.add(LANG_DETECTION, languageDetection(document, CONTENT, 10000)); } finally { IOUtils.closeQuietly(word6); } }
SummaryInformation si = (SummaryInformation) PropertySetFactory.create(event.getStream()); String title = si.getTitle(); String Author= si.getLastAuthor(); ......
public static String parseOLE2FileAuthor(File file) { String author=null; try { FileInputStream stream = new FileInputStream(file); POIFSFileSystem poifs = new POIFSFileSystem(stream); DirectoryEntry dir = poifs.getRoot(); DocumentEntry siEntry = (DocumentEntry)dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME); DocumentInputStream dis = new DocumentInputStream(siEntry); PropertySet ps = new PropertySet(dis); SummaryInformation si = new SummaryInformation(ps); author=si.getAuthor(); stream.close(); } catch (IOException ex) { ex.getStackTrace(); } catch (NoPropertySetStreamException ex) { ex.getStackTrace(); } catch (MarkUnsupportedException ex) { ex.getStackTrace(); } catch (UnexpectedPropertySetTypeException ex) { ex.getStackTrace(); } return author;
System.out.println("ApplicationName: "+doc.getSummaryInformation().getApplicationName()); System.out.println("OSVersion: "+doc.getSummaryInformation().getOSVersion()); System.out.println("# paragraphs: "+doc.getDocumentSummaryInformation().getParCount()); System.out.println("# bytes: "+doc.getDocumentSummaryInformation().getByteCount());
@Override public void processPOIFSReaderEvent(final POIFSReaderEvent event) { SummaryInformation si; try { si = (SummaryInformation) PropertySetFactory.create(event.getStream()); } catch (Exception ex) { throw new RuntimeException ("Property set stream \"" + event.getPath() + event.getName() + "\": " + ex); } final String title = si.getTitle(); if (title != null) System.out.println("Title: \"" + title + "\""); else System.out.println("Document has no title."); } }
protected void sanitizeComments(BleachSession session, SummaryInformation dsi) { String comments = dsi.getComments(); if (comments == null || comments.isEmpty()) { return; } LOGGER.trace("Removing the document's Comments (was '{}')", comments); dsi.removeComments(); Threat threat = Threat.builder() .type(ThreatType.UNRECOGNIZED_CONTENT) .severity(ThreatSeverity.LOW) .action(ThreatAction.REMOVE) .location("Summary Information - Comment") .details("Comment was: '" + comments + "'") .build(); session.recordThreat(threat); }
meta = m_summary.getTitle(); if (CmsStringUtil.isNotEmpty(meta)) { metaInfo.put(I_CmsExtractionResult.META_TITLE, meta); meta = m_summary.getKeywords(); if (CmsStringUtil.isNotEmpty(meta)) { metaInfo.put(I_CmsExtractionResult.META_KEYWORDS, meta); meta = m_summary.getSubject(); if (CmsStringUtil.isNotEmpty(meta)) { metaInfo.put(I_CmsExtractionResult.META_SUBJECT, meta); meta = m_summary.getComments(); if (CmsStringUtil.isNotEmpty(meta)) { metaInfo.put(I_CmsExtractionResult.META_COMMENTS, meta); meta = m_summary.getAuthor(); if (CmsStringUtil.isNotEmpty(meta)) { metaInfo.put(I_CmsExtractionResult.META_AUTHOR, meta); date = m_summary.getCreateDateTime(); if ((date != null) && (date.getTime() > 0)) { date = m_summary.getLastSaveDateTime(); if ((date != null) && (date.getTime() > 0)) { metaInfo.put(I_CmsExtractionResult.META_DATE_LASTMODIFIED, date);
SummaryInformation summaryInformation = new SummaryInformation(ps); ph.setProjectTitle(summaryInformation.getTitle()); ph.setSubject(summaryInformation.getSubject()); ph.setAuthor(summaryInformation.getAuthor()); ph.setKeywords(summaryInformation.getKeywords()); ph.setComments(summaryInformation.getComments()); ph.setTemplate(summaryInformation.getTemplate()); ph.setLastAuthor(summaryInformation.getLastAuthor()); ph.setRevision(NumberHelper.parseInteger(summaryInformation.getRevNumber())); ph.setCreationDate(summaryInformation.getCreateDateTime()); ph.setLastSaved(summaryInformation.getLastSaveDateTime()); ph.setShortApplicationName(summaryInformation.getApplicationName()); ph.setEditingTime(Integer.valueOf((int) summaryInformation.getEditTime())); ph.setLastPrinted(summaryInformation.getLastPrinted());
protected void processDocumentInformation( SummaryInformation summaryInformation ) { if ( AbstractExcelUtils.isNotEmpty( summaryInformation.getTitle() ) ) foDocumentFacade.setTitle( summaryInformation.getTitle() ); if ( AbstractExcelUtils.isNotEmpty( summaryInformation.getAuthor() ) ) foDocumentFacade.setCreator( summaryInformation.getAuthor() ); if ( AbstractExcelUtils.isNotEmpty( summaryInformation.getKeywords() ) ) foDocumentFacade.setKeywords( summaryInformation.getKeywords() ); if ( AbstractExcelUtils.isNotEmpty( summaryInformation.getComments() ) ) foDocumentFacade.setDescription( summaryInformation.getComments() ); }
System.out.println("ApplicationName: "+doc.getSummaryInformation().getApplicationName()); System.out.println("OSVersion: "+doc.getSummaryInformation().getOSVersion()); System.out.println("# paragraphs: "+doc.getDocumentSummaryInformation().getParCount()); System.out.println("# bytes: "+doc.getDocumentSummaryInformation().getByteCount());
public void setSummaryInformation( SummaryInformation si ) { title = si.getTitle(); subject = si.getSubject(); author = si.getAuthor(); keywords = si.getKeywords(); comment = si.getComments(); template = si.getTemplate(); lastSaved = si.getLastSaveDateTime(); revision = si.getRevNumber(); totalEditingTime = si.getEditTime(); lastPrinted = si.getLastPrinted(); created = si.getCreateDateTime(); pages = si.getPageCount(); words = si.getWordCount(); characters = si.getCharCount(); creatingApplication = si.getApplicationName(); thumbnail = si.getThumbnail(); }