/** * This will print the documents data to System.out. * * @param document The document to get the metadata from. * * @throws IOException If there is an error getting the page count. */ public void printMetadata( PDDocument document ) throws IOException { PDDocumentInformation info = document.getDocumentInformation(); PDDocumentCatalog cat = document.getDocumentCatalog(); PDMetadata metadata = cat.getMetadata(); System.out.println( "Page Count=" + document.getNumberOfPages() ); System.out.println( "Title=" + info.getTitle() ); System.out.println( "Author=" + info.getAuthor() ); System.out.println( "Subject=" + info.getSubject() ); System.out.println( "Keywords=" + info.getKeywords() ); System.out.println( "Creator=" + info.getCreator() ); System.out.println( "Producer=" + info.getProducer() ); System.out.println( "Creation Date=" + formatDate( info.getCreationDate() ) ); System.out.println( "Modification Date=" + formatDate( info.getModificationDate() ) ); System.out.println( "Trapped=" + info.getTrapped() ); if( metadata != null ) { String string = new String( metadata.toByteArray(), "ISO-8859-1" ); System.out.println( "Metadata=" + string ); } }
throws ValidationException Calendar modifyDate = dico.getModificationDate(); COSBase item = dico.getCOSObject().getItem(COSName.MOD_DATE); if (modifyDate != null && isValidPDFDateFormat(item))
@Override public Calendar getModDate() { if (info != null) { return info.getModificationDate(); } return null; }
addTextField(document, "Creator", info.getCreator()); addTextField(document, "Keywords", info.getKeywords()); addTextField(document, "ModificationDate", info.getModificationDate()); addTextField(document, "Producer", info.getProducer()); addTextField(document, "Subject", info.getSubject());
basicSchema.setModifyDate( info.getModificationDate() ); basicSchema.setCreateDate( info.getCreationDate() ); basicSchema.setCreatorTool( info.getCreator() );
addMetadata(metadata, PDF.DOC_INFO_CREATED, created); addMetadata(metadata, TikaCoreProperties.CREATED, created); Calendar modified = info.getModificationDate(); addMetadata(metadata, TikaCoreProperties.MODIFIED, modified); addMetadata(metadata, PDF.DOC_INFO_MODIFICATION_DATE, modified);
/** * This will print the documents data to System.out. * * @param document The document to get the metadata from. * * @throws IOException If there is an error getting the page count. */ public void printMetadata( PDDocument document ) throws IOException { PDDocumentInformation info = document.getDocumentInformation(); PDDocumentCatalog cat = document.getDocumentCatalog(); PDMetadata metadata = cat.getMetadata(); System.out.println( "Page Count=" + document.getNumberOfPages() ); System.out.println( "Title=" + info.getTitle() ); System.out.println( "Author=" + info.getAuthor() ); System.out.println( "Subject=" + info.getSubject() ); System.out.println( "Keywords=" + info.getKeywords() ); System.out.println( "Creator=" + info.getCreator() ); System.out.println( "Producer=" + info.getProducer() ); System.out.println( "Creation Date=" + formatDate( info.getCreationDate() ) ); System.out.println( "Modification Date=" + formatDate( info.getModificationDate() ) ); System.out.println( "Trapped=" + info.getTrapped() ); if( metadata != null ) { String string = new String( metadata.toByteArray(), "ISO-8859-1" ); System.out.println( "Metadata=" + string ); } }
throws ValidationException Calendar modifyDate = dico.getModificationDate(); COSBase item = dico.getCOSObject().getItem(COSName.MOD_DATE); if (modifyDate != null && isValidPDFDateFormat(item))
throws ValidationException Calendar modifyDate = dico.getModificationDate(); COSBase item = dico.getCOSObject().getItem(COSName.MOD_DATE); if (modifyDate != null && isValidPDFDateFormat(item))
addTextField(document, "Creator", info.getCreator()); addTextField(document, "Keywords", info.getKeywords()); addTextField(document, "ModificationDate", info.getModificationDate()); addTextField(document, "Producer", info.getProducer()); addTextField(document, "Subject", info.getSubject());
calValue = docinfo.getModificationDate();
basicSchema.setModifyDate( info.getModificationDate() ); basicSchema.setCreateDate( info.getCreationDate() ); basicSchema.setCreatorTool( info.getCreator() );
modificationDate = docInfo.getModificationDate(); } catch (IOException e) { modificationDate = null;
private void extractMetaData(final PDDocument pdf, final ParserFieldsBuilder metas) { metas.set(MIME_TYPE, DEFAULT_MIMETYPES[0]); final PDDocumentInformation info = pdf.getDocumentInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(SUBJECT, info.getSubject()); metas.add(AUTHOR, info.getAuthor()); metas.add(PRODUCER, info.getProducer()); metas.add(KEYWORDS, info.getKeywords()); metas.add(CREATION_DATE, info.getCreationDate()); metas.add(MODIFICATION_DATE, info.getModificationDate()); } int pages = pdf.getNumberOfPages(); metas.add(NUMBER_OF_PAGES, pages); PDDocumentCatalog catalog = pdf.getDocumentCatalog(); if (catalog != null) metas.add(LANGUAGE, catalog.getLanguage()); }
addMetadata(metadata, PDF.DOC_INFO_CREATED, info.getCreationDate()); addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate()); Calendar modified = info.getModificationDate(); addMetadata(metadata, Metadata.LAST_MODIFIED, modified); addMetadata(metadata, TikaCoreProperties.MODIFIED, modified); addMetadata(metadata, PDF.DOC_INFO_MODIFICATION_DATE, info.getModificationDate());
creator = metadata.getCreator(); keywords = metadata.getKeywords(); modificationDate = metadata.getModificationDate(); producer = metadata.getProducer(); subject = metadata.getSubject();
addMetadata(metadata, PDF.DOC_INFO_CREATED, info.getCreationDate()); addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate()); Calendar modified = info.getModificationDate(); addMetadata(metadata, Metadata.LAST_MODIFIED, modified); addMetadata(metadata, TikaCoreProperties.MODIFIED, modified); addMetadata(metadata, PDF.DOC_INFO_MODIFICATION_DATE, info.getModificationDate());
addMetadata(metadata, PDF.DOC_INFO_CREATED, created); addMetadata(metadata, TikaCoreProperties.CREATED, created); Calendar modified = info.getModificationDate(); addMetadata(metadata, Metadata.LAST_MODIFIED, modified); addMetadata(metadata, TikaCoreProperties.MODIFIED, modified);
addMetadata(metadata, "created", info.getCreationDate()); addMetadata(metadata, Metadata.CREATION_DATE, info.getCreationDate()); Calendar modified = info.getModificationDate(); addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
addMetadata(metadata, "created", info.getCreationDate()); addMetadata(metadata, Metadata.CREATION_DATE, info.getCreationDate()); Calendar modified = info.getModificationDate(); addMetadata(metadata, Metadata.LAST_MODIFIED, modified);