/** * Creates a new document summary information. * * @return the new document summary information. */ public static DocumentSummaryInformation newDocumentSummaryInformation() { return new DocumentSummaryInformation(); } }
public String getDocumentSummaryInformationText() { if(document == null) { // event based extractor does not have a document return ""; } DocumentSummaryInformation dsi = document.getDocumentSummaryInformation(); StringBuilder text = new StringBuilder(); // Normal properties text.append( getPropertiesText(dsi) ); // Now custom ones CustomProperties cps = dsi == null ? null : dsi.getCustomProperties(); if (cps != null) { for (String key : cps.nameSet()) { String val = getPropertyValueText(cps.get(key)); text.append(key).append(" = ").append(val).append("\n"); } } // All done return text.toString(); } public String getSummaryInformationText() {
/** * Gets the custom properties. * * @return The custom properties. */ public CustomProperties getCustomProperties() { CustomProperties cps = null; if (getSectionCount() >= 2) { cps = new CustomProperties(); final Section section = getSections().get(1); final Map<Long,String> dictionary = section.getDictionary(); final Property[] properties = section.getProperties(); int propertyCount = 0; for (Property p : properties) { final long id = p.getID(); if (id == PropertyIDMap.PID_CODEPAGE) { cps.setCodepage((Integer)p.getValue()); } else if (id > PropertyIDMap.PID_CODEPAGE) { propertyCount++; final CustomProperty cp = new CustomProperty(p, dictionary.get(id)); cps.put(cp.getName(), cp); } } if (cps.size() != propertyCount) { cps.setPure(false); } } return cps; }
/** * Removes the custom properties. */ public void removeCustomProperties() { if (getSectionCount() < 2) { throw new HPSFRuntimeException("Illegal internal format of Document SummaryInformation stream: second section is missing."); } List<Section> l = new LinkedList<>(getSections()); clearSections(); int idx = 0; for (Section s : l) { if (idx++ != 1) { addSection(s); } } }
/** * Creates section 2 if it is not already present. */ private void ensureSection2() { if (getSectionCount() < 2) { Section s2 = new Section(); s2.setFormatID(USER_DEFINED_PROPERTIES); addSection(s2); } }
/** * Sets the custom properties. * * @param customProperties The custom properties */ public void setCustomProperties(final CustomProperties customProperties) { ensureSection2(); final Section section = getSections().get(1); final Map<Long,String> dictionary = customProperties.getDictionary(); // section.clear(); /* Set the codepage. If both custom properties and section have a * codepage, the codepage from the custom properties wins, else take the * one that is defined. If none is defined, take ISO-8859-1. */ int cpCodepage = customProperties.getCodepage(); if (cpCodepage < 0) { cpCodepage = section.getCodepage(); } if (cpCodepage < 0) { cpCodepage = Property.DEFAULT_CODEPAGE; } customProperties.setCodepage(cpCodepage); section.setCodepage(cpCodepage); section.setDictionary(dictionary); for (CustomProperty p : customProperties.properties()) { section.setProperty(p); } }
DocumentSummaryInformation documentSummaryInformation = ps == null ? new DocumentSummaryInformation() : new DocumentSummaryInformation(ps); ph.setCategory(documentSummaryInformation.getCategory()); ph.setPresentationFormat(documentSummaryInformation.getPresentationFormat()); ph.setManager(documentSummaryInformation.getManager()); ph.setCompany(documentSummaryInformation.getCompany()); ph.setContentType(documentSummaryInformation.getContentType()); ph.setContentStatus(documentSummaryInformation.getContentStatus()); ph.setLanguage(documentSummaryInformation.getLanguage()); ph.setDocumentVersion(documentSummaryInformation.getDocumentVersion()); CustomProperties customProperties = documentSummaryInformation.getCustomProperties(); if (customProperties != null)
private void parse(DocumentSummaryInformation summary) { set(OfficeOpenXMLExtended.COMPANY, summary.getCompany()); addMulti(metadata, OfficeOpenXMLExtended.MANAGER, summary.getManager()); set(TikaCoreProperties.LANGUAGE, getLanguage(summary)); set(OfficeOpenXMLCore.CATEGORY, summary.getCategory()); // New style counts set(Office.SLIDE_COUNT, summary.getSlideCount()); if (summary.getSlideCount() > 0) { metadata.set(PagedText.N_PAGES, summary.getSlideCount()); } parse(summary.getCustomProperties()); }
System.out.println("ApplicationName: "+doc.getSummaryInformation().getApplicationName()); System.out.println("OSVersion: "+doc.getSummaryInformation().getOSVersion()); System.out.println("# paragraphs: "+doc.getDocumentSummaryInformation().getParCount()); System.out.println("# bytes: "+doc.getDocumentSummaryInformation().getByteCount()); System.out.println("# hidden: "+doc.getDocumentSummaryInformation().getHiddenCount()); System.out.println("# lines: "+doc.getDocumentSummaryInformation().getLineCount()); System.out.println("# mmclips: "+doc.getDocumentSummaryInformation().getMMClipCount()); System.out.println("# notes: "+doc.getDocumentSummaryInformation().getNoteCount()); System.out.println("# sections: "+doc.getDocumentSummaryInformation().getSectionCount()); System.out.println("# slides: "+doc.getDocumentSummaryInformation().getSlideCount()); System.out.println("format: "+doc.getDocumentSummaryInformation().getFormat()); for( TextPiece tp : doc.getTextTable().getTextPieces() ) { System.out.println("TP: "+tp.getStringBuffer().substring(0, 100)); System.out.println("TP: "+tp.getPieceDescriptor().isUnicode()); for( Object os : doc.getDocumentSummaryInformation().getSections() ) { Section s = (Section) os; System.out.println("ss# fid: "+s.getFormatID());
/** * Creates an empty {@link DocumentSummaryInformation}. */ public DocumentSummaryInformation() { getFirstSection().setFormatID(DOC_SUMMARY_INFORMATION); }
dsi.setCategory("POI example"); System.out.println("Category changed to " + dsi.getCategory() + "."); CustomProperties customProperties = dsi.getCustomProperties(); if (customProperties == null) customProperties = new CustomProperties(); dsi.setCustomProperties(customProperties); dsi.write(dir, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
DocumentSummaryInformation summaryInfo=doc.getDocumentSummaryInformation(); String category = summaryInfo.getCategory(); String company = summaryInfo.getCompany(); int lineCount=summaryInfo.getLineCount();
meta = m_documentSummary.getCompany(); if (CmsStringUtil.isNotEmpty(meta)) { metaInfo.put(I_CmsExtractionResult.META_COMPANY, meta); meta = m_documentSummary.getManager(); if (CmsStringUtil.isNotEmpty(meta)) { metaInfo.put(I_CmsExtractionResult.META_MANAGER, meta); meta = m_documentSummary.getCategory(); if (CmsStringUtil.isNotEmpty(meta)) { metaInfo.put(I_CmsExtractionResult.META_CATEGORY, meta);
/** * <p>Sets the hidden count.</p> * * @param hiddenCount The hidden count to set. */ public void setHiddenCount(final int hiddenCount) { final MutableSection s = (MutableSection) getSections().get(0); s.setProperty(PropertyIDMap.PID_HIDDENCOUNT, hiddenCount); }
DocumentSummaryInformation documentSummaryInformation = ps == null ? new DocumentSummaryInformation() : new DocumentSummaryInformation(ps); ph.setCategory(documentSummaryInformation.getCategory()); ph.setPresentationFormat(documentSummaryInformation.getPresentationFormat()); ph.setManager(documentSummaryInformation.getManager()); ph.setCompany(documentSummaryInformation.getCompany()); ph.setContentType(documentSummaryInformation.getContentType()); ph.setContentStatus(documentSummaryInformation.getContentStatus()); ph.setLanguage(documentSummaryInformation.getLanguage()); ph.setDocumentVersion(documentSummaryInformation.getDocumentVersion()); CustomProperties customProperties = documentSummaryInformation.getCustomProperties(); if (customProperties != null)
private void parse(DocumentSummaryInformation summary) { set(OfficeOpenXMLExtended.COMPANY, summary.getCompany()); addMulti(metadata, OfficeOpenXMLExtended.MANAGER, summary.getManager()); set(TikaCoreProperties.LANGUAGE, getLanguage(summary)); set(OfficeOpenXMLCore.CATEGORY, summary.getCategory()); // New style counts set(Office.SLIDE_COUNT, summary.getSlideCount()); if (summary.getSlideCount() > 0) { metadata.set(PagedText.N_PAGES, summary.getSlideCount()); } // Old style, Tika 1.0 counts // TODO Remove these in Tika 2.0 set(Metadata.COMPANY, summary.getCompany()); set(Metadata.MANAGER, summary.getManager()); set(MSOffice.SLIDE_COUNT, summary.getSlideCount()); set(Metadata.CATEGORY, summary.getCategory()); parse(summary.getCustomProperties()); }
System.out.println("ApplicationName: "+doc.getSummaryInformation().getApplicationName()); System.out.println("OSVersion: "+doc.getSummaryInformation().getOSVersion()); System.out.println("# paragraphs: "+doc.getDocumentSummaryInformation().getParCount()); System.out.println("# bytes: "+doc.getDocumentSummaryInformation().getByteCount()); System.out.println("# hidden: "+doc.getDocumentSummaryInformation().getHiddenCount()); System.out.println("# lines: "+doc.getDocumentSummaryInformation().getLineCount()); System.out.println("# mmclips: "+doc.getDocumentSummaryInformation().getMMClipCount()); System.out.println("# notes: "+doc.getDocumentSummaryInformation().getNoteCount()); System.out.println("# sections: "+doc.getDocumentSummaryInformation().getSectionCount()); System.out.println("# slides: "+doc.getDocumentSummaryInformation().getSlideCount()); System.out.println("format: "+doc.getDocumentSummaryInformation().getFormat()); for( TextPiece tp : doc.getTextTable().getTextPieces() ) { System.out.println("TP: "+tp.getStringBuffer().substring(0, 100)); System.out.println("TP: "+tp.getPieceDescriptor().isUnicode()); for( Object os : doc.getDocumentSummaryInformation().getSections() ) { Section s = (Section) os; System.out.println("ss# fid: "+s.getFormatID());
/** * Sets the presentation format. * * @param presentationFormat The presentation format to set. */ public void setPresentationFormat(final String presentationFormat) { getFirstSection().setProperty(PropertyIDMap.PID_PRESFORMAT, presentationFormat); }
/** * Removes the custom properties. */ public void removeCustomProperties() { if (getSectionCount() < 2) { throw new HPSFRuntimeException("Illegal internal format of Document SummaryInformation stream: second section is missing."); } List<Section> l = new LinkedList<>(getSections()); clearSections(); int idx = 0; for (Section s : l) { if (idx++ != 1) { addSection(s); } } }
/** * <p>Sets the custom properties.</p> * * @param customProperties The custom properties */ public void setCustomProperties(final CustomProperties customProperties) { ensureSection2(); final MutableSection section = (MutableSection) getSections().get(1); final Map<Long,String> dictionary = customProperties.getDictionary(); section.clear(); /* Set the codepage. If both custom properties and section have a * codepage, the codepage from the custom properties wins, else take the * one that is defined. If none is defined, take Unicode. */ int cpCodepage = customProperties.getCodepage(); if (cpCodepage < 0) cpCodepage = section.getCodepage(); if (cpCodepage < 0) cpCodepage = Constants.CP_UNICODE; customProperties.setCodepage(cpCodepage); section.setCodepage(cpCodepage); section.setDictionary(dictionary); for (final Iterator<CustomProperty> i = customProperties.values().iterator(); i.hasNext();) { final Property p = i.next(); section.setProperty(p); } }