private void parse(SummaryInformation summary) { set(TikaCoreProperties.TITLE, summary.getTitle()); addMulti(metadata, TikaCoreProperties.CREATOR, summary.getAuthor()); //make sure these are retrievable specifically add(Office.KEYWORDS, summary.getKeywords()); add(OfficeOpenXMLCore.SUBJECT, summary.getSubject()); set(TikaCoreProperties.MODIFIER, summary.getLastAuthor()); set(TikaCoreProperties.COMMENTS, summary.getComments()); set(OfficeOpenXMLExtended.TEMPLATE, summary.getTemplate()); set(OfficeOpenXMLExtended.APPLICATION, summary.getApplicationName()); set(OfficeOpenXMLCore.REVISION, summary.getRevNumber()); set(TikaCoreProperties.CREATED, summary.getCreateDateTime()); set(TikaCoreProperties.MODIFIED, summary.getLastSaveDateTime()); set(TikaCoreProperties.PRINT_DATE, summary.getLastPrinted()); set(OfficeOpenXMLExtended.TOTAL_TIME, Long.toString(summary.getEditTime())); set(OfficeOpenXMLExtended.DOC_SECURITY, summary.getSecurity()); // New style counts set(Office.WORD_COUNT, summary.getWordCount()); set(Office.CHARACTER_COUNT, summary.getCharCount()); set(Office.PAGE_COUNT, summary.getPageCount()); if (summary.getPageCount() > 0) { metadata.set(PagedText.N_PAGES, summary.getPageCount()); } }
text.append("\nLast Printed: " + si.getLastPrinted()); text.append("\nCreate Date/Time: " + si.getCreateDateTime()); text.append("\nLast Save Date/Time: " + si.getLastSaveDateTime()); text.append("\nPage Count: " + si.getPageCount()); text.append("\nWord Count: " + si.getWordCount());
text.append("\nLast Printed: " + si.getLastPrinted()); text.append("\nCreate Date/Time: " + si.getCreateDateTime()); text.append("\nLast Save Date/Time: " + si.getLastSaveDateTime()); text.append("\nPage Count: " + si.getPageCount()); text.append("\nWord Count: " + si.getWordCount());
text.append("\nLast Printed: " + si.getLastPrinted()); text.append("\nCreate Date/Time: " + si.getCreateDateTime()); text.append("\nLast Save Date/Time: " + si.getLastSaveDateTime()); text.append("\nPage Count: " + si.getPageCount()); text.append("\nWord Count: " + si.getWordCount());
date = m_summary.getLastSaveDateTime(); if ((date != null) && (date.getTime() > 0)) { metaInfo.put(I_CmsExtractionResult.META_DATE_LASTMODIFIED, date);
Date coreProp=summaryInfo.getLastSaveDateTime(); if ((coreProp!=null) && (coreProp.toString().matches(this.hocr.getMetaDataFilter().get(corePropertyName)))) { matchOnce=true;
if (si.getLastSaveDateTime() != null) props.put(DCMetaData.DATE, si.getLastSaveDateTime());
public void setSummaryInformation( SummaryInformation si ) { title = si.getTitle(); subject = si.getSubject(); author = si.getAuthor(); keywords = si.getKeywords(); comment = si.getComments(); template = si.getTemplate(); lastSaved = si.getLastSaveDateTime(); revision = si.getRevNumber(); totalEditingTime = si.getEditTime(); lastPrinted = si.getLastPrinted(); created = si.getCreateDateTime(); pages = si.getPageCount(); words = si.getWordCount(); characters = si.getCharCount(); creatingApplication = si.getApplicationName(); thumbnail = si.getThumbnail(); }
public void setSummaryInformation( SummaryInformation si ) { title = si.getTitle(); subject = si.getSubject(); author = si.getAuthor(); keywords = si.getKeywords(); comment = si.getComments(); template = si.getTemplate(); lastSaved = si.getLastSaveDateTime(); revision = si.getRevNumber(); totalEditingTime = si.getEditTime(); lastPrinted = si.getLastPrinted(); created = si.getCreateDateTime(); pages = si.getPageCount(); words = si.getWordCount(); characters = si.getCharCount(); creatingApplication = si.getApplicationName(); thumbnail = si.getThumbnail(); }
@Override public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream, final String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws Exception { final HSSFWorkbook workbook = new HSSFWorkbook(inputStream); try (final ExcelExtractor excel = new ExcelExtractor(workbook)) { final ParserFieldsBuilder metas = resultBuilder.metas(); metas.set(MIME_TYPE, findMimeType(extension, mimeType, this::findMimeTypeUsingDefault)); final SummaryInformation info = excel.getSummaryInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(KEYWORDS, info.getKeywords()); } final ParserFieldsBuilder result = resultBuilder.newDocument(); result.add(CONTENT, excel.getText()); result.add(LANG_DETECTION, languageDetection(result, CONTENT, 10000)); } } }
@Override public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream, final String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws Exception { try (final PublisherTextExtractor extractor = new PublisherTextExtractor(inputStream)) { final ParserFieldsBuilder metas = resultBuilder.metas(); metas.set(MIME_TYPE, findMimeType(extension, mimeType, this::findMimeTypeUsingDefault)); final SummaryInformation info = extractor.getSummaryInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(CONTENT, info.getKeywords()); metas.add(COMMENTS, info.getComments()); } final String text = extractor.getText(); if (StringUtils.isEmpty(text)) return; final ParserFieldsBuilder result = resultBuilder.newDocument(); result.add(CONTENT, text); result.add(LANG_DETECTION, languageDetection(result, CONTENT, 10000)); } } }
private void currentWordExtraction(final InputStream inputStream, final ParserResultBuilder resultBuilder) throws IOException { try (final WordExtractor word = new WordExtractor(inputStream)) { final SummaryInformation info = word.getSummaryInformation(); if (info != null) { final ParserFieldsBuilder metas = resultBuilder.metas(); metas.set(MIME_TYPE, DEFAULT_MIMETYPES[0]); metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(KEYWORDS, info.getKeywords()); } final ParserFieldsBuilder document = resultBuilder.newDocument(); final String[] paragraphes = word.getParagraphText(); if (paragraphes != null) for (String paragraph : paragraphes) document.add(CONTENT, paragraph); document.add(LANG_DETECTION, languageDetection(document, CONTENT, 10000)); } }
@Override public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream, String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws Exception { try (final VisioTextExtractor extractor = new VisioTextExtractor(inputStream)) { final SummaryInformation info = extractor.getSummaryInformation(); if (info != null) { final ParserFieldsBuilder metas = resultBuilder.metas(); metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(CONTENT, info.getKeywords()); metas.add(COMMENTS, info.getComments()); } final String[] texts = extractor.getAllText(); if (texts == null) return; final ParserFieldsBuilder result = resultBuilder.newDocument(); for (String text : texts) result.add(CONTENT, text); result.add(LANG_DETECTION, languageDetection(result, CONTENT, 10000)); } } }
ph.setRevision(NumberHelper.parseInteger(summaryInformation.getRevNumber())); ph.setCreationDate(summaryInformation.getCreateDateTime()); ph.setLastSaved(summaryInformation.getLastSaveDateTime()); ph.setShortApplicationName(summaryInformation.getApplicationName()); ph.setEditingTime(Integer.valueOf((int) summaryInformation.getEditTime()));
ph.setRevision(NumberHelper.parseInteger(summaryInformation.getRevNumber())); ph.setCreationDate(summaryInformation.getCreateDateTime()); ph.setLastSaved(summaryInformation.getLastSaveDateTime()); ph.setShortApplicationName(summaryInformation.getApplicationName()); ph.setEditingTime(Integer.valueOf((int) summaryInformation.getEditTime()));
set(OfficeOpenXMLCore.REVISION, summary.getRevNumber()); set(TikaCoreProperties.CREATED, summary.getCreateDateTime()); set(TikaCoreProperties.MODIFIED, summary.getLastSaveDateTime()); set(TikaCoreProperties.PRINT_DATE, summary.getLastPrinted()); set(Metadata.EDIT_TIME, summary.getEditTime());
set(OfficeOpenXMLCore.REVISION, summary.getRevNumber()); set(TikaCoreProperties.CREATED, summary.getCreateDateTime()); set(TikaCoreProperties.MODIFIED, summary.getLastSaveDateTime()); set(TikaCoreProperties.PRINT_DATE, summary.getLastPrinted()); set(Metadata.EDIT_TIME, summary.getEditTime());